diff --git a/sei-db/db_engine/litt/README.md b/sei-db/db_engine/litt/README.md index 4a3740330a..190e98227f 100644 --- a/sei-db/db_engine/litt/README.md +++ b/sei-db/db_engine/litt/README.md @@ -233,18 +233,13 @@ the [value](#value) associated with a [key](#key) can be retrieved from disk. An address is encoded in a 64-bit integer. It contains two pieces of information: - the [segment](#segment) [index](#segment-index) where the [value](#value) is stored +- the [shard](#shard) within that segment that holds the [value](#value) - the offset within the [value file](#segment-value-files) where the first byte of the [value](#value) is stored +- the length of the [value](#value) in bytes -This information is not enough by itself to retrieve the [value](#value) from disk if there is more than one -[shard](#shard) in the [table](#table). When there is more than one [shard](#shard), the following information -must also be known in order to retrieve the [value](#value) (i.e. to figure out which [shard](#shard) to look in): - -- the [sharding factor](#sharding-factor) for the [segment](#segment) where the [value](#value) is stored - (stored in the [segment metadata file](#segment-metadata-file)) -- the [sharding salt](#sharding-salt) for the [table](#table) where the [value](#value) is stored - (stored in the [table metadata file](#table-metadata-file)) -- the [key](#key) that the [value](#value) is associated with +All four pieces are packed into the address itself, so retrieving a [value](#value) is a self-contained +operation that does not need to consult any segment-level metadata or recompute anything from the [key](#key). ## Atomicity @@ -432,7 +427,6 @@ Each metadata contains the following information: - the [segment index](#segment-index) - serialization version (in case the format changes in the future) - the [sharding factor](#sharding-factor) for the segment -- the [salt](#sharding-salt) used for the segment - the [timestamp](#segment-timestamp) of the last element written in the segment. the [TTL](#ttl) of any data contained within it. - whether or not the segment is [immutable](#segment-mutability) @@ -462,26 +456,22 @@ The file name of a value file is `X-Y.values`, where `X` is the [segment index]( LittDB supports sharding. That is to say, it can break the data into smaller pieces and spread those pieces across multiple locations. -In order to determine the shard that a particular [key](#key) is in, a hash function is used. The data that goes -into the hash function is the [key](#key) itself, as well as a [sharding salt](#sharding-salt) that is unique to -each [segment](#segment). +Within a [segment](#segment), [values](#value) are assigned to shards in round-robin order at write time: the first +write goes to shard 0, the second to shard 1, and so on, wrapping around once every shard has been used. Each +[value's](#value) shard is recorded in its [address](#address), so reads do not need to recompute the assignment. -The [sharding salt](#sharding-salt) is chosen randomly. Its purpose is to make the mapping between [keys](#key) and -shards unpredictable to an outside attacker. Without this sort of randomness, an attacker could intentionally craft -keys that all map to the same shard, causing a hot spot in the database and potentially degrading performance. +This scheme produces a perfectly even distribution of [values](#value) across shards regardless of the +[keys](#key) being written. As a side benefit, an outside attacker cannot craft a sequence of [keys](#key) that +all land in the same shard, since the shard chosen for a given [value](#value) depends only on the order in which +it was written, not on the contents of its [key](#key). ### Sharding Factor -The number of [shards](#shard) in a [segment](#segment) is called the "sharding factor". The sharding factor must be -a positive, non-zero integer. The sharding factor can be changed at runtime without restarting the database or +The number of [shards](#shard) in a [segment](#segment) is called the "sharding factor". The sharding factor must +be a positive, non-zero integer no larger than 256 (the limit imposed by encoding the shard ID as a single byte +inside the [address](#address)). The sharding factor can be changed at runtime without restarting the database or performing a data migration. -### Sharding Salt - -A random number chosen to make the [shard](#shard) hash function unpredictable to an outside attacker. This number -does not need to be chosen via a cryptographically secure random number generator, as long as it is not publicly -known. - ## Table A table in LittDB is a unique namespace. Two [keys](#key) with identical values do not conflict with each other as diff --git a/sei-db/db_engine/litt/cli/prune_test.go b/sei-db/db_engine/litt/cli/prune_test.go index dcd5d88648..0432dd4938 100644 --- a/sei-db/db_engine/litt/cli/prune_test.go +++ b/sei-db/db_engine/litt/cli/prune_test.go @@ -114,10 +114,10 @@ func TestPrune(t *testing.T) { seg := segments[i] metadataPath := seg.GetMetadataFilePath() - // Overwrite the old metadata file. The timestamp is encoded at [24:32] in nanoseconds since the epoch. + // Overwrite the old metadata file. The timestamp is encoded at [8:16] in nanoseconds since the epoch. data, err := os.ReadFile(metadataPath) require.NoError(t, err) - binary.BigEndian.PutUint64(data[24:32], sixHoursAgo) + binary.BigEndian.PutUint64(data[8:16], sixHoursAgo) // write the modified metadata file back to disk. err = os.WriteFile(metadataPath, data, 0644) @@ -278,10 +278,10 @@ func TestPruneSubset(t *testing.T) { seg := segments[i] metadataPath := seg.GetMetadataFilePath() - // Overwrite the old metadata file. The timestamp is encoded at [24:32] in nanoseconds since the epoch. + // Overwrite the old metadata file. The timestamp is encoded at [8:16] in nanoseconds since the epoch. data, err := os.ReadFile(metadataPath) require.NoError(t, err) - binary.BigEndian.PutUint64(data[24:32], sixHoursAgo) + binary.BigEndian.PutUint64(data[8:16], sixHoursAgo) // write the modified metadata file back to disk. err = os.WriteFile(metadataPath, data, 0644) diff --git a/sei-db/db_engine/litt/disktable/control_loop.go b/sei-db/db_engine/litt/disktable/control_loop.go index deb3123ed3..0cdf6c2782 100644 --- a/sei-db/db_engine/litt/disktable/control_loop.go +++ b/sei-db/db_engine/litt/disktable/control_loop.go @@ -5,7 +5,6 @@ package disktable import ( "fmt" "log/slog" - "math/rand" "sync" "sync/atomic" "time" @@ -84,9 +83,6 @@ type controlLoop struct { // The table's metadata. metadata *tableMetadata - // A source of randomness used for generating sharding salt. - saltShaker *rand.Rand - // whether fsync mode is enabled. fsync bool @@ -320,11 +316,6 @@ func (c *controlLoop) expandSegments() error { c.immutableSegmentSize += c.segments[c.highestSegmentIndex].Size() // Create a new segment. - salt := [16]byte{} - _, err = c.saltShaker.Read(salt[:]) - if err != nil { - return fmt.Errorf("failed to read salt: %w", err) - } newSegment, err := segment.CreateSegment( c.logger, c.errorMonitor, @@ -332,7 +323,6 @@ func (c *controlLoop) expandSegments() error { c.segmentPaths, c.snapshottingEnabled, c.metadata.GetShardingFactor(), - salt, c.fsync) if err != nil { return err diff --git a/sei-db/db_engine/litt/disktable/disk_table.go b/sei-db/db_engine/litt/disktable/disk_table.go index 1d759b1564..15d478d470 100644 --- a/sei-db/db_engine/litt/disktable/disk_table.go +++ b/sei-db/db_engine/litt/disktable/disk_table.go @@ -7,7 +7,6 @@ import ( "fmt" "log/slog" "math" - "math/rand" "os" "path" "sync" @@ -229,12 +228,6 @@ func NewDiskTable( } else { nextSegmentIndex = highestSegmentIndex + 1 } - salt := [16]byte{} - _, err = config.SaltShaker.Read(salt[:]) - if err != nil { - return nil, fmt.Errorf("failed to read salt: %w", err) - } - mutableSegment, err := segment.CreateSegment( config.Logger, errorMonitor, @@ -242,7 +235,6 @@ func NewDiskTable( segmentPaths, snapshottingEnabled, metadata.GetShardingFactor(), - salt, config.Fsync) if err != nil { return nil, fmt.Errorf("failed to create mutable segment: %w", err) @@ -261,8 +253,6 @@ func NewDiskTable( } } - tableSaltShaker := rand.New(rand.NewSource(config.SaltShaker.Int63())) - var upperBoundSnapshotFile *BoundaryFile if config.SnapshotDirectory != "" { // Initialize snapshot files if snapshotting is enabled. @@ -307,7 +297,6 @@ func NewDiskTable( clock: config.Clock, segmentPaths: segmentPaths, snapshottingEnabled: snapshottingEnabled, - saltShaker: tableSaltShaker, metadata: metadata, fsync: config.Fsync, metrics: metrics, @@ -635,6 +624,10 @@ func (d *DiskTable) SetShardingFactor(shardingFactor uint32) error { if shardingFactor == 0 { return fmt.Errorf("sharding factor must be greater than 0") } + if shardingFactor > litt.MaxShardingFactor { + return fmt.Errorf("sharding factor must be at most %d, got %d", + litt.MaxShardingFactor, shardingFactor) + } request := &controlLoopSetShardingFactorRequest{ shardingFactor: shardingFactor, diff --git a/sei-db/db_engine/litt/disktable/disk_table_test.go b/sei-db/db_engine/litt/disktable/disk_table_test.go index 749bcfa6f4..ed8960d78d 100644 --- a/sei-db/db_engine/litt/disktable/disk_table_test.go +++ b/sei-db/db_engine/litt/disktable/disk_table_test.go @@ -106,7 +106,6 @@ func buildMemKeyDiskTableSingleShard( config.TargetSegmentFileSize = 100 // intentionally use a very small segment size config.GCPeriod = time.Millisecond config.Fsync = false - config.SaltShaker = util.NewTestRandom().Rand config.Logger = logger table, err := NewDiskTable( @@ -153,7 +152,6 @@ func buildMemKeyDiskTableMultiShard( config.TargetSegmentFileSize = 100 // intentionally use a very small segment size config.GCPeriod = time.Millisecond config.Fsync = false - config.SaltShaker = util.NewTestRandom().Rand config.ShardingFactor = 4 config.Logger = logger @@ -200,7 +198,6 @@ func buildPebbleDBKeyDiskTableSingleShard( config.TargetSegmentFileSize = 100 // intentionally use a very small segment size config.GCPeriod = time.Millisecond config.Fsync = false - config.SaltShaker = util.NewTestRandom().Rand config.Logger = logger table, err := NewDiskTable( @@ -246,7 +243,6 @@ func buildPebbleDBKeyDiskTableMultiShard( config.TargetSegmentFileSize = 100 // intentionally use a very small segment size config.GCPeriod = time.Millisecond config.Fsync = false - config.SaltShaker = util.NewTestRandom().Rand config.ShardingFactor = 4 config.Logger = logger @@ -1214,10 +1210,9 @@ func truncatedValueFileTest(t *testing.T, tableBuilder *tableBuilder) { // Find a shard that has at least one key in the last segment (truncating an empty file is boring) keysInLastFile, err := segments[highestSegmentIndex].GetKeys() require.NoError(t, err) - diskTable := table.(*DiskTable) nonEmptyShards := make(map[uint32]struct{}) for key := range keysInLastFile { - keyShard := diskTable.controlLoop.segments[highestSegmentIndex].GetShard(keysInLastFile[key].Key) + keyShard := uint32(keysInLastFile[key].Address.ShardID()) nonEmptyShards[keyShard] = struct{}{} } var shard uint32 @@ -1243,7 +1238,7 @@ func truncatedValueFileTest(t *testing.T, tableBuilder *tableBuilder) { // Figure out which keys are expected to be missing missingKeys := make(map[string]struct{}) for _, key := range keysInLastFile { - keyShard := diskTable.controlLoop.segments[diskTable.controlLoop.highestSegmentIndex].GetShard(key.Key) + keyShard := uint32(key.Address.ShardID()) if keyShard != shard { // key does not belong to the shard that was truncated continue diff --git a/sei-db/db_engine/litt/disktable/keymap/keymap_test.go b/sei-db/db_engine/litt/disktable/keymap/keymap_test.go index a81ecdbc73..9cb0911b73 100644 --- a/sei-db/db_engine/litt/disktable/keymap/keymap_test.go +++ b/sei-db/db_engine/litt/disktable/keymap/keymap_test.go @@ -38,6 +38,15 @@ func buildPebbleDBKeymap(logger *slog.Logger, path string) (Keymap, error) { return kmap, nil } +func randomAddress(rand *util.TestRandom) types.Address { + return types.NewAddress( + rand.Uint32(), + rand.Uint32(), + uint8(rand.Uint32Range(0, 256)), + rand.Uint32(), + ) +} + func testBasicBehavior(t *testing.T, keymap Keymap) { rand := util.NewTestRandom() @@ -49,7 +58,7 @@ func testBasicBehavior(t *testing.T, keymap Keymap) { if choice < 0.5 { // Write a random value key := []byte(rand.String(32)) - address := types.Address(rand.Uint64()) + address := randomAddress(rand) err := keymap.Put([]*types.ScopedKey{{Key: key, Address: address}}) require.NoError(t, err) @@ -78,7 +87,7 @@ func testBasicBehavior(t *testing.T, keymap Keymap) { pairs := make([]*types.ScopedKey, numberToWrite) for i := 0; i < int(numberToWrite); i++ { key := []byte(rand.String(32)) - address := types.Address(rand.Uint64()) + address := randomAddress(rand) pairs[i] = &types.ScopedKey{Key: key, Address: address} expected[string(key)] = address } @@ -150,7 +159,7 @@ func TestRestart(t *testing.T) { if choice < 0.5 { // Write a random value key := []byte(rand.String(32)) - address := types.Address(rand.Uint64()) + address := randomAddress(rand) err := keymap.Put([]*types.ScopedKey{{Key: key, Address: address}}) require.NoError(t, err) @@ -179,7 +188,7 @@ func TestRestart(t *testing.T) { pairs := make([]*types.ScopedKey, numberToWrite) for i := 0; i < int(numberToWrite); i++ { key := []byte(rand.String(32)) - address := types.Address(rand.Uint64()) + address := randomAddress(rand) pairs[i] = &types.ScopedKey{Key: key, Address: address} expected[string(key)] = address } @@ -225,7 +234,7 @@ func TestRestart(t *testing.T) { if choice < 0.5 { // Write a random value key := []byte(rand.String(32)) - address := types.Address(rand.Uint64()) + address := randomAddress(rand) err := keymap.Put([]*types.ScopedKey{{Key: key, Address: address}}) require.NoError(t, err) @@ -254,7 +263,7 @@ func TestRestart(t *testing.T) { pairs := make([]*types.ScopedKey, numberToWrite) for i := 0; i < int(numberToWrite); i++ { key := []byte(rand.String(32)) - address := types.Address(rand.Uint64()) + address := randomAddress(rand) pairs[i] = &types.ScopedKey{Key: key, Address: address} expected[string(key)] = address } diff --git a/sei-db/db_engine/litt/disktable/keymap/pebble_db_keymap.go b/sei-db/db_engine/litt/disktable/keymap/pebble_db_keymap.go index 222f67848c..40e2bb88cb 100644 --- a/sei-db/db_engine/litt/disktable/keymap/pebble_db_keymap.go +++ b/sei-db/db_engine/litt/disktable/keymap/pebble_db_keymap.go @@ -133,19 +133,19 @@ func (p *PebbleDBKeymap) Get(key []byte) (types.Address, bool, error) { val, closer, err := p.db.Get(key) if err != nil { if errors.Is(err, pebble.ErrNotFound) { - return 0, false, nil + return types.Address{}, false, nil } - return 0, false, fmt.Errorf("failed to get key from PebbleDB: %w", err) + return types.Address{}, false, fmt.Errorf("failed to get key from PebbleDB: %w", err) } // Clone the bytes before closing, since the slice is only valid until closer.Close(). cloned := bytes.Clone(val) if cerr := closer.Close(); cerr != nil { - return 0, false, fmt.Errorf("failed to close PebbleDB get closer: %w", cerr) + return types.Address{}, false, fmt.Errorf("failed to close PebbleDB get closer: %w", cerr) } address, err := types.DeserializeAddress(cloned) if err != nil { - return 0, false, fmt.Errorf("failed to deserialize address: %w", err) + return types.Address{}, false, fmt.Errorf("failed to deserialize address: %w", err) } return address, true, nil diff --git a/sei-db/db_engine/litt/disktable/segment/address_test.go b/sei-db/db_engine/litt/disktable/segment/address_test.go index 47ffcfa4cf..bed7a2d576 100644 --- a/sei-db/db_engine/litt/disktable/segment/address_test.go +++ b/sei-db/db_engine/litt/disktable/segment/address_test.go @@ -3,6 +3,8 @@ package segment import ( + "encoding/binary" + "math" "testing" "github.com/sei-protocol/sei-chain/sei-db/db_engine/litt/types" @@ -10,14 +12,241 @@ import ( "github.com/stretchr/testify/require" ) -func TestAddress(t *testing.T) { +// randomAddress builds an Address with each field independently randomized over its full domain. +func randomAddress(rand *util.TestRandom) types.Address { + return types.NewAddress( + rand.Uint32(), + rand.Uint32(), + uint8(rand.Uint32Range(0, 256)), + rand.Uint32(), + ) +} + +// assertRoundTrip serializes the input, deserializes the result, and asserts equality in both directions. +func assertRoundTrip(t *testing.T, address types.Address) { + t.Helper() + + serialized := address.Serialize() + require.Len(t, serialized, types.AddressSerializedSize) + + deserialized, err := types.DeserializeAddress(serialized) + require.NoError(t, err) + require.Equal(t, address, deserialized) + + // Going the other direction (bytes -> Address -> bytes) should also be stable. + reserialized := deserialized.Serialize() + require.Equal(t, serialized, reserialized) +} + +func TestAddressGetters(t *testing.T) { t.Parallel() rand := util.NewTestRandom() index := rand.Uint32() offset := rand.Uint32() - address := types.NewAddress(index, offset) + shardID := uint8(rand.Uint32Range(0, 256)) + valueSize := rand.Uint32() + address := types.NewAddress(index, offset, shardID, valueSize) require.Equal(t, index, address.Index()) require.Equal(t, offset, address.Offset()) + require.Equal(t, shardID, address.ShardID()) + require.Equal(t, valueSize, address.ValueSize()) +} + +// TestAddressZeroValueRoundTrip verifies that the zero Address (which is what callers see for keymap misses) +// roundtrips cleanly through serialize/deserialize. +func TestAddressZeroValueRoundTrip(t *testing.T) { + t.Parallel() + + zero := types.Address{} + require.Equal(t, uint32(0), zero.Index()) + require.Equal(t, uint32(0), zero.Offset()) + require.Equal(t, uint8(0), zero.ShardID()) + require.Equal(t, uint32(0), zero.ValueSize()) + + serialized := zero.Serialize() + require.Len(t, serialized, types.AddressSerializedSize) + for i, b := range serialized { + require.Equal(t, byte(0), b, "byte %d should be zero", i) + } + + deserialized, err := types.DeserializeAddress(serialized) + require.NoError(t, err) + require.Equal(t, zero, deserialized) +} + +// TestAddressBoundaryRoundTrips covers the corners of the value domain to make sure +// no field-truncation or sign-extension bugs sneak in. +func TestAddressBoundaryRoundTrips(t *testing.T) { + t.Parallel() + + cases := []types.Address{ + types.NewAddress(0, 0, 0, 0), + types.NewAddress(math.MaxUint32, math.MaxUint32, math.MaxUint8, math.MaxUint32), + types.NewAddress(math.MaxUint32, 0, 0, 0), + types.NewAddress(0, math.MaxUint32, 0, 0), + types.NewAddress(0, 0, math.MaxUint8, 0), + types.NewAddress(0, 0, 0, math.MaxUint32), + types.NewAddress(1, 2, 3, 4), + types.NewAddress(math.MaxUint32, 0, math.MaxUint8, 0), + types.NewAddress(0, math.MaxUint32, 0, math.MaxUint32), + } + + for i, addr := range cases { + addr := addr + t.Run("", func(t *testing.T) { + t.Parallel() + assertRoundTrip(t, addr) + require.NotPanicsf(t, func() { _ = addr.String() }, "case %d", i) + }) + } +} + +// TestAddressAllShardIDsRoundTrip exhaustively covers every legal shard ID (0..255) so that we know +// the single byte slot is wired up for every value it can take. +func TestAddressAllShardIDsRoundTrip(t *testing.T) { + t.Parallel() + rand := util.NewTestRandom() + + for shard := 0; shard < 256; shard++ { + address := types.NewAddress(rand.Uint32(), rand.Uint32(), uint8(shard), rand.Uint32()) + assertRoundTrip(t, address) + require.Equal(t, uint8(shard), address.ShardID()) + } +} + +// TestAddressRandomRoundTrips fuzzes the round trip with a large batch of independently random addresses. +func TestAddressRandomRoundTrips(t *testing.T) { + t.Parallel() + rand := util.NewTestRandom() + + for i := 0; i < 1000; i++ { + assertRoundTrip(t, randomAddress(rand)) + } +} + +// TestAddressSerializeWireFormat pins down the on-disk byte layout so an accidental change to the wire format +// is caught by tests rather than by silently corrupting persisted data. +func TestAddressSerializeWireFormat(t *testing.T) { + t.Parallel() + + const ( + index uint32 = 0x01020304 + offset uint32 = 0x05060708 + shardID uint8 = 0x09 + valueSize uint32 = 0x0A0B0C0D + ) + + expected := []byte{ + 0x01, 0x02, 0x03, 0x04, // index + 0x05, 0x06, 0x07, 0x08, // offset + 0x09, // shardID + 0x0A, 0x0B, 0x0C, 0x0D, // valueSize + } + require.Len(t, expected, types.AddressSerializedSize) + + address := types.NewAddress(index, offset, shardID, valueSize) + require.Equal(t, expected, address.Serialize()) + + deserialized, err := types.DeserializeAddress(expected) + require.NoError(t, err) + require.Equal(t, address, deserialized) +} + +// TestAddressDeserializeSerializeRoundTrip confirms that arbitrary 13-byte buffers are stable when +// fed through deserialize → serialize. +func TestAddressDeserializeSerializeRoundTrip(t *testing.T) { + t.Parallel() + rand := util.NewTestRandom() + + for i := 0; i < 1000; i++ { + raw := rand.Bytes(types.AddressSerializedSize) + + address, err := types.DeserializeAddress(raw) + require.NoError(t, err) + + require.Equal(t, raw, address.Serialize()) + + // Sanity: every byte is reachable through one of the getters and big-endian decoding. + require.Equal(t, binary.BigEndian.Uint32(raw[0:4]), address.Index()) + require.Equal(t, binary.BigEndian.Uint32(raw[4:8]), address.Offset()) + require.Equal(t, raw[8], address.ShardID()) + require.Equal(t, binary.BigEndian.Uint32(raw[9:13]), address.ValueSize()) + } +} + +// TestAddressSerializeReturnsFreshBuffer guards against a future "optimization" that returns a shared +// underlying array, which would silently cause callers (e.g. the key file writer) to see corrupted data +// if they retain the slice across calls. +func TestAddressSerializeReturnsFreshBuffer(t *testing.T) { + t.Parallel() + rand := util.NewTestRandom() + + address := randomAddress(rand) + + first := address.Serialize() + second := address.Serialize() + require.Equal(t, first, second) + + // Mutating the first slice must not affect a subsequent serialization, nor the Address itself. + original := append([]byte{}, first...) + for i := range first { + first[i] ^= 0xFF + } + + require.Equal(t, original, second, "second serialization should not share memory with the first") + require.Equal(t, original, address.Serialize(), "third serialization should match the original bytes") +} + +// TestAddressDeserializeIsIndependentOfInput guards against deserialize aliasing the caller-owned input +// slice. Mutating the source bytes after Deserialize returns must not perturb the resulting Address. +func TestAddressDeserializeIsIndependentOfInput(t *testing.T) { + t.Parallel() + rand := util.NewTestRandom() + + source := rand.Bytes(types.AddressSerializedSize) + address, err := types.DeserializeAddress(source) + require.NoError(t, err) + + expected := address + for i := range source { + source[i] ^= 0xFF + } + + require.Equal(t, expected, address) +} + +// TestDeserializeAddressLengthError checks that DeserializeAddress rejects every length that is not exactly +// AddressSerializedSize, including the legacy 8-byte address length. +func TestDeserializeAddressLengthError(t *testing.T) { + t.Parallel() + + badLengths := []int{ + 0, + 1, + 8, // the pre-refactor uint64 length, included as a regression guard + types.AddressSerializedSize - 1, + types.AddressSerializedSize + 1, + 32, + 1024, + } + + for _, badLength := range badLengths { + _, err := types.DeserializeAddress(make([]byte, badLength)) + require.Errorf(t, err, "expected error for length %d", badLength) + } +} + +// TestAddressString provides smoke coverage for the String formatter so that the human-readable form +// is at least guaranteed to mention every field. +func TestAddressString(t *testing.T) { + t.Parallel() + + address := types.NewAddress(11, 22, 33, 44) + s := address.String() + + for _, want := range []string{"11", "22", "33", "44"} { + require.Contains(t, s, want, "String() = %q should contain %q", s, want) + } } diff --git a/sei-db/db_engine/litt/disktable/segment/key_file.go b/sei-db/db_engine/litt/disktable/segment/key_file.go index 482b51617c..e77160e362 100644 --- a/sei-db/db_engine/litt/disktable/segment/key_file.go +++ b/sei-db/db_engine/litt/disktable/segment/key_file.go @@ -63,7 +63,7 @@ func createKeyFile( logger: logger, index: index, segmentPath: segmentPath, - segmentVersion: ValueSizeSegmentVersion, + segmentVersion: LatestSegmentVersion, swap: swap, } @@ -189,23 +189,16 @@ func (k *keyFile) write(scopedKey *types.ScopedKey) error { return fmt.Errorf("failed to write key to key file: %w", err) } - // Write the address. - err = binary.Write(k.writer, binary.BigEndian, scopedKey.Address) + // Write the serialized address (which includes the shard ID and value size). + _, err = k.writer.Write(scopedKey.Address.Serialize()) if err != nil { return fmt.Errorf("failed to write address to key file: %w", err) } - // Write the size of the value. - err = binary.Write(k.writer, binary.BigEndian, scopedKey.ValueSize) - if err != nil { - return fmt.Errorf("failed to write value size to key file: %w", err) - } - k.size += uint64( 4 /* uint32 size of key */ + len(scopedKey.Key) + - 8 /* uint64 address */ + - 4 /* uint32 size of value */) + types.AddressSerializedSize) return nil } @@ -284,36 +277,24 @@ func (k *keyFile) readKeys() ([]*types.ScopedKey, error) { keyLength := int(binary.BigEndian.Uint32(keyBytes[index : index+4])) index += 4 - if k.segmentVersion < ValueSizeSegmentVersion { - // We need to read the key, as well as the 8 byte address. - if index+keyLength+8 > len(keyBytes) { - // There are insufficient bytes left in the file to read the key and address. - break - } - } else { - // We need to read the key, as well as the 8 byte address and 4 byte value size. - if index+keyLength+12 > len(keyBytes) { - // There are insufficient bytes left in the file to read the key, address, and value size. - break - } + // We need to read the key, as well as the serialized address (which embeds the shard ID and value size). + if index+keyLength+types.AddressSerializedSize > len(keyBytes) { + // There are insufficient bytes left in the file to read the key and address. + break } key := keyBytes[index : index+keyLength] index += keyLength - address := types.Address(binary.BigEndian.Uint64(keyBytes[index : index+8])) - index += 8 - - var valueSize uint32 - if k.segmentVersion >= ValueSizeSegmentVersion { - valueSize = binary.BigEndian.Uint32(keyBytes[index : index+4]) - index += 4 + address, err := types.DeserializeAddress(keyBytes[index : index+types.AddressSerializedSize]) + if err != nil { + return nil, fmt.Errorf("failed to deserialize address: %w", err) } + index += types.AddressSerializedSize keys = append(keys, &types.ScopedKey{ - Key: key, - Address: address, - ValueSize: valueSize, + Key: key, + Address: address, }) } diff --git a/sei-db/db_engine/litt/disktable/segment/key_file_test.go b/sei-db/db_engine/litt/disktable/segment/key_file_test.go index 3a08604f5e..7c78a3b16f 100644 --- a/sei-db/db_engine/litt/disktable/segment/key_file_test.go +++ b/sei-db/db_engine/litt/disktable/segment/key_file_test.go @@ -25,9 +25,13 @@ func TestReadWriteKeys(t *testing.T) { keys := make([]*types.ScopedKey, keyCount) for i := 0; i < int(keyCount); i++ { key := rand.VariableBytes(1, 100) - address := types.Address(rand.Uint64()) - valueSize := rand.Uint32() - keys[i] = &types.ScopedKey{Key: key, Address: address, ValueSize: valueSize} + address := types.NewAddress( + rand.Uint32(), + rand.Uint32(), + uint8(rand.Uint32Range(0, 256)), + rand.Uint32(), + ) + keys[i] = &types.ScopedKey{Key: key, Address: address} } segmentPath, err := NewSegmentPath(directory, "", "table") @@ -65,7 +69,7 @@ func TestReadWriteKeys(t *testing.T) { } // Create a new in-memory instance from the on-disk file and verify that it behaves the same. - file2, err := loadKeyFile(logger, index, []*SegmentPath{segmentPath}, ValueSizeSegmentVersion) + file2, err := loadKeyFile(logger, index, []*SegmentPath{segmentPath}, LatestSegmentVersion) require.NoError(t, err) require.Equal(t, file.Size(), file2.Size()) @@ -99,9 +103,13 @@ func TestReadingTruncatedKeyFile(t *testing.T) { keys := make([]*types.ScopedKey, keyCount) for i := 0; i < int(keyCount); i++ { key := rand.VariableBytes(1, 100) - address := types.Address(rand.Uint64()) - valueSize := rand.Uint32() - keys[i] = &types.ScopedKey{Key: key, Address: address, ValueSize: valueSize} + address := types.NewAddress( + rand.Uint32(), + rand.Uint32(), + uint8(rand.Uint32Range(0, 256)), + rand.Uint32(), + ) + keys[i] = &types.ScopedKey{Key: key, Address: address} } segmentPath, err := NewSegmentPath(directory, "", "table") @@ -181,9 +189,13 @@ func TestSwappingKeyFile(t *testing.T) { keys := make([]*types.ScopedKey, keyCount) for i := 0; i < int(keyCount); i++ { key := rand.VariableBytes(1, 100) - address := types.Address(rand.Uint64()) - valueSize := rand.Uint32() - keys[i] = &types.ScopedKey{Key: key, Address: address, ValueSize: valueSize} + address := types.NewAddress( + rand.Uint32(), + rand.Uint32(), + uint8(rand.Uint32Range(0, 256)), + rand.Uint32(), + ) + keys[i] = &types.ScopedKey{Key: key, Address: address} } segmentPath, err := NewSegmentPath(directory, "", "table") @@ -221,7 +233,7 @@ func TestSwappingKeyFile(t *testing.T) { } // Create a new in-memory instance from the on-disk file and verify that it behaves the same. - file2, err := loadKeyFile(logger, index, []*SegmentPath{segmentPath}, ValueSizeSegmentVersion) + file2, err := loadKeyFile(logger, index, []*SegmentPath{segmentPath}, LatestSegmentVersion) require.NoError(t, err) require.Equal(t, file.Size(), file2.Size()) @@ -271,7 +283,7 @@ func TestSwappingKeyFile(t *testing.T) { require.Equal(t, actualSize, reportedSize) // Verify the contents of the new file. Reload it from disk just to ensure that we aren't "cheating" somehow. - file2, err = loadKeyFile(logger, index, []*SegmentPath{segmentPath}, ValueSizeSegmentVersion) + file2, err = loadKeyFile(logger, index, []*SegmentPath{segmentPath}, LatestSegmentVersion) require.NoError(t, err) readKeys, err = file2.readKeys() require.NoError(t, err) diff --git a/sei-db/db_engine/litt/disktable/segment/metadata_file.go b/sei-db/db_engine/litt/disktable/segment/metadata_file.go index 15ee163170..0aca7e3eee 100644 --- a/sei-db/db_engine/litt/disktable/segment/metadata_file.go +++ b/sei-db/db_engine/litt/disktable/segment/metadata_file.go @@ -24,33 +24,14 @@ const ( // deleted. MetadataSwapExtension = MetadataFileExtension + util.SwapFileExtension - // V0MetadataSize is the size the metadata file at version 0 (aka OldHashFunctionSegmentVersion) - // This is a constant, so it's convenient to have it here. - // - 4 bytes for version - // - 4 bytes for the sharding factor - // - 4 bytes for salt - // - 8 bytes for lastValueTimestamp - // - and 1 byte for sealed. - V0MetadataSize = 21 - - // V1MetadataSize is the size of the metadata file at version 1 (aka SipHashSegmentVersion). - // This is a constant, so it's convenient to have it here. - // - 4 bytes for version - // - 4 bytes for the sharding factor - // - 16 bytes for salt - // - 8 bytes for lastValueTimestamp - // - and 1 byte for sealed. - V1MetadataSize = 33 - - // V2MetadataSize is the size of the metadata file at version 2 (aka ValueSizeSegmentVersion). - // This is a constant, so it's convenient to have it here. - // - 4 bytes for version - // - 4 bytes for the sharding factor - // - 16 bytes for salt - // - 8 bytes for lastValueTimestamp - // - 4 bytes for keyCount - // - and 1 byte for sealed. - V2MetadataSize = 37 + // V3MetadataSize is the size of the metadata file at LatestSegmentVersion (ShardedAddressSegmentVersion). + // Layout: + // - 4 bytes for version + // - 4 bytes for the sharding factor + // - 8 bytes for lastValueTimestamp + // - 4 bytes for keyCount + // - 1 byte for sealed + V3MetadataSize = 21 ) // metadataFile contains metadata about a segment. This file contains metadata about the data segment, such as @@ -66,15 +47,6 @@ type metadataFile struct { // The sharding factor for this segment. This value is encoded in the file. shardingFactor uint32 - // A random number, used to make the sharding hash function hard for an attacker to predict. - // This value is encoded in the file. Note: after the hash function change, this value is - // only used for data written with the old hash function. - legacySalt uint32 - - // A random byte array, used to make the sharding hash function hard for an attacker to predict. - // This value is encoded in the file. - salt [16]byte - // The time when the last value was written into the segment, in nanoseconds since the epoch. A segment can // only be deleted when all values within it are expired, and so we only need to keep track of the // lastValueTimestamp of the last value (which always expires last). This value is irrelevant if the segment is @@ -102,7 +74,6 @@ type metadataFile struct { func createMetadataFile( index uint32, shardingFactor uint32, - salt [16]byte, path *SegmentPath, fsync bool, ) (*metadataFile, error) { @@ -115,7 +86,6 @@ func createMetadataFile( file.segmentVersion = LatestSegmentVersion file.shardingFactor = shardingFactor - file.salt = salt err := file.write() if err != nil { return nil, fmt.Errorf("failed to write metadata file: %v", err) @@ -170,14 +140,7 @@ func getMetadataFileIndex(fileName string) (uint32, error) { // Size returns the size of the metadata file in bytes. func (m *metadataFile) Size() uint64 { - switch m.segmentVersion { - case OldHashFunctionSegmentVersion: - return V0MetadataSize - case SipHashSegmentVersion: - return V1MetadataSize - default: - return V2MetadataSize - } + return V3MetadataSize } // Name returns the file name for this metadata file. @@ -203,119 +166,23 @@ func (m *metadataFile) seal(now time.Time, keyCount uint32) error { return nil } -func (m *metadataFile) serializeV0Legacy() []byte { - data := make([]byte, V0MetadataSize) - - // Write the version - binary.BigEndian.PutUint32(data[0:4], uint32(m.segmentVersion)) - - // Write the sharding factor - binary.BigEndian.PutUint32(data[4:8], m.shardingFactor) - - // Write the salt - binary.BigEndian.PutUint32(data[8:12], m.legacySalt) - - // Write the lastValueTimestamp - binary.BigEndian.PutUint64(data[12:20], m.lastValueTimestamp) - - // Write the sealed flag - if m.sealed { - data[20] = 1 - } else { - data[20] = 0 - } - - return data -} - -func (m *metadataFile) serializeV1Legacy() []byte { - data := make([]byte, V1MetadataSize) - - // Write the version - binary.BigEndian.PutUint32(data[0:4], uint32(m.segmentVersion)) - - // Write the sharding factor - binary.BigEndian.PutUint32(data[4:8], m.shardingFactor) - - // Write the salt - copy(data[8:24], m.salt[:]) - - // Write the lastValueTimestamp - binary.BigEndian.PutUint64(data[24:32], m.lastValueTimestamp) - - // Write the sealed flag - if m.sealed { - data[32] = 1 - } else { - data[32] = 0 - } - - return data -} - // serialize serializes the metadata file to a byte array. func (m *metadataFile) serialize() []byte { - if m.segmentVersion == OldHashFunctionSegmentVersion { - return m.serializeV0Legacy() - } else if m.segmentVersion == SipHashSegmentVersion { - return m.serializeV1Legacy() - } + data := make([]byte, V3MetadataSize) - data := make([]byte, V2MetadataSize) - - // Write the version binary.BigEndian.PutUint32(data[0:4], uint32(m.segmentVersion)) - - // Write the sharding factor binary.BigEndian.PutUint32(data[4:8], m.shardingFactor) - - // Write the salt - copy(data[8:24], m.salt[:]) - - // Write the lastValueTimestamp - binary.BigEndian.PutUint64(data[24:32], m.lastValueTimestamp) - - // Write the key count - binary.BigEndian.PutUint32(data[32:36], m.keyCount) - - // Write the sealed flag + binary.BigEndian.PutUint64(data[8:16], m.lastValueTimestamp) + binary.BigEndian.PutUint32(data[16:20], m.keyCount) if m.sealed { - data[36] = 1 + data[20] = 1 } else { - data[36] = 0 + data[20] = 0 } return data } -func (m *metadataFile) deserializeV0Legacy(data []byte) error { - // TODO (cody.littley): delete this after all data is migrated - if len(data) != V0MetadataSize { - return fmt.Errorf("metadata file is not the correct size, expected %d, got %d", - V0MetadataSize, len(data)) - } - - m.shardingFactor = binary.BigEndian.Uint32(data[4:8]) - m.legacySalt = binary.BigEndian.Uint32(data[8:12]) - m.lastValueTimestamp = binary.BigEndian.Uint64(data[12:20]) - m.sealed = data[20] == 1 - return nil -} - -func (m *metadataFile) deserializeV1Legacy(data []byte) error { - // TODO (cody.littley): delete this after all data is migrated - if len(data) != V1MetadataSize { - return fmt.Errorf("metadata file is not the correct size, expected %d, got %d", - V1MetadataSize, len(data)) - } - - m.shardingFactor = binary.BigEndian.Uint32(data[4:8]) - m.salt = [16]byte(data[8:24]) - m.lastValueTimestamp = binary.BigEndian.Uint64(data[24:32]) - m.sealed = data[32] == 1 - return nil -} - // deserialize deserializes the metadata file from a byte array. func (m *metadataFile) deserialize(data []byte) error { if len(data) < 4 { @@ -323,26 +190,20 @@ func (m *metadataFile) deserialize(data []byte) error { } m.segmentVersion = SegmentVersion(binary.BigEndian.Uint32(data[0:4])) - if m.segmentVersion > LatestSegmentVersion { - return fmt.Errorf("unsupported serialization version: %d", m.segmentVersion) + if m.segmentVersion != LatestSegmentVersion { + return fmt.Errorf("unsupported segment version: %d (only version %d is supported)", + m.segmentVersion, LatestSegmentVersion) } - if m.segmentVersion == OldHashFunctionSegmentVersion { - return m.deserializeV0Legacy(data) - } else if m.segmentVersion == SipHashSegmentVersion { - return m.deserializeV1Legacy(data) - } - - if len(data) != V2MetadataSize { + if len(data) != V3MetadataSize { return fmt.Errorf("metadata file is not the correct size, expected %d, got %d", - V2MetadataSize, len(data)) + V3MetadataSize, len(data)) } m.shardingFactor = binary.BigEndian.Uint32(data[4:8]) - m.salt = [16]byte(data[8:24]) - m.lastValueTimestamp = binary.BigEndian.Uint64(data[24:32]) - m.keyCount = binary.BigEndian.Uint32(data[32:36]) - m.sealed = data[36] == 1 + m.lastValueTimestamp = binary.BigEndian.Uint64(data[8:16]) + m.keyCount = binary.BigEndian.Uint32(data[16:20]) + m.sealed = data[20] == 1 return nil } diff --git a/sei-db/db_engine/litt/disktable/segment/metadata_file_test.go b/sei-db/db_engine/litt/disktable/segment/metadata_file_test.go index ec7e62d27d..897a89e9cc 100644 --- a/sei-db/db_engine/litt/disktable/segment/metadata_file_test.go +++ b/sei-db/db_engine/litt/disktable/segment/metadata_file_test.go @@ -17,7 +17,6 @@ func TestUnsealedSerialization(t *testing.T) { index := rand.Uint32() shardingFactor := rand.Uint32() - salt := ([16]byte)(rand.Bytes(16)) timestamp := rand.Uint64() segmentPath, err := NewSegmentPath(directory, "", "table") require.NoError(t, err) @@ -27,7 +26,6 @@ func TestUnsealedSerialization(t *testing.T) { index: index, segmentVersion: LatestSegmentVersion, shardingFactor: shardingFactor, - salt: salt, lastValueTimestamp: timestamp, sealed: false, segmentPath: segmentPath, @@ -64,7 +62,6 @@ func TestSealedSerialization(t *testing.T) { index := rand.Uint32() shardingFactor := rand.Uint32() - salt := ([16]byte)(rand.Bytes(16)) timestamp := rand.Uint64() segmentPath, err := NewSegmentPath(directory, "", "table") require.NoError(t, err) @@ -74,7 +71,6 @@ func TestSealedSerialization(t *testing.T) { index: index, segmentVersion: LatestSegmentVersion, shardingFactor: shardingFactor, - salt: salt, lastValueTimestamp: timestamp, sealed: true, segmentPath: segmentPath, @@ -109,14 +105,12 @@ func TestFreshFileSerialization(t *testing.T) { rand := util.NewTestRandom() directory := t.TempDir() - salt := ([16]byte)(rand.Bytes(16)) - index := rand.Uint32() segmentPath, err := NewSegmentPath(directory, "", "table") require.NoError(t, err) err = segmentPath.MakeDirectories(false) require.NoError(t, err) - m, err := createMetadataFile(index, 1234, salt, segmentPath, false) + m, err := createMetadataFile(index, 1234, segmentPath, false) require.NoError(t, err) require.Equal(t, index, m.index) @@ -151,14 +145,12 @@ func TestSealing(t *testing.T) { rand := util.NewTestRandom() directory := t.TempDir() - salt := ([16]byte)(rand.Bytes(16)) - index := rand.Uint32() segmentPath, err := NewSegmentPath(directory, "", "table") require.NoError(t, err) err = segmentPath.MakeDirectories(false) require.NoError(t, err) - m, err := createMetadataFile(index, 1234, salt, segmentPath, false) + m, err := createMetadataFile(index, 1234, segmentPath, false) require.NoError(t, err) // seal the file @@ -170,7 +162,6 @@ func TestSealing(t *testing.T) { require.Equal(t, LatestSegmentVersion, m.segmentVersion) require.True(t, m.sealed) require.Equal(t, uint64(sealTime.UnixNano()), m.lastValueTimestamp) - require.Equal(t, salt, m.salt) require.Equal(t, uint32(1234), m.shardingFactor) require.Equal(t, uint32(987), m.keyCount) diff --git a/sei-db/db_engine/litt/disktable/segment/segment.go b/sei-db/db_engine/litt/disktable/segment/segment.go index 9a0ddfba2f..fd41da9ffc 100644 --- a/sei-db/db_engine/litt/disktable/segment/segment.go +++ b/sei-db/db_engine/litt/disktable/segment/segment.go @@ -94,6 +94,16 @@ type Segment struct { // If true, then sync the file system for atomic operations. Should always be true in production, but can // be set to false for tests to save time. fsync bool + + // nextShard is the shard index that will receive the next value written to this segment. After each Write, + // it is incremented modulo metadata.shardingFactor, producing a perfectly even round-robin distribution of + // values across shards regardless of the keys being written. This counter is only meaningful for the + // mutable segment (sealed segments never accept further writes), so we do not persist it to disk and we do + // not bother reconstructing it when loading a sealed segment from disk. + // + // Write is only ever invoked from the disk_table control loop, which is single-threaded with respect to + // any given segment, so we do not guard nextShard with atomics or a lock. + nextShard uint32 } // CreateSegment creates a new data segment. @@ -104,14 +114,13 @@ func CreateSegment( segmentPaths []*SegmentPath, snapshottingEnabled bool, shardingFactor uint32, - salt [16]byte, fsync bool) (*Segment, error) { if len(segmentPaths) == 0 { return nil, errors.New("no segment paths provided") } - metadata, err := createMetadataFile(index, shardingFactor, salt, segmentPaths[0], fsync) + metadata, err := createMetadataFile(index, shardingFactor, segmentPaths[0], fsync) if err != nil { return nil, fmt.Errorf("failed to open metadata file: %v", err) } @@ -266,11 +275,11 @@ func (s *Segment) sealLoadedSegment(now time.Time) error { badKeys := make([]*types.ScopedKey, 0, len(scopedKeys)) for _, scopedKey := range scopedKeys { - shard := s.GetShard(scopedKey.Key) + shard := scopedKey.Address.ShardID() requiredValueFileLength := uint64(scopedKey.Address.Offset()) + 4 /* value size uint32 */ + - uint64(scopedKey.ValueSize) + uint64(scopedKey.Address.ValueSize()) if s.shards[shard].Size() < requiredValueFileLength { badKeys = append(badKeys, scopedKey) @@ -378,22 +387,6 @@ func (s *Segment) SetNextSegment(nextSegment *Segment) { s.nextSegment = nextSegment } -// GetShard returns the shard number for a key. -func (s *Segment) GetShard(key []byte) uint32 { - if s.metadata.shardingFactor == 1 { - // Shortcut: if we have one shard, we don't need to hash the key to figure out the mapping. - return 0 - } - - if s.metadata.segmentVersion == OldHashFunctionSegmentVersion { - return util.LegacyHashKey(key, s.metadata.legacySalt) % s.metadata.shardingFactor - } - - hash := util.HashKey(key, s.metadata.salt) - - return hash % s.metadata.shardingFactor -} - // Write records a key-value pair in the data segment, returning the maximum size of all shards within this segment. // // This method does not ensure that the key-value pair is actually written to disk, only that it will eventually be @@ -403,7 +396,14 @@ func (s *Segment) Write(data *types.KVPair) (keyCount uint32, keyFileSize uint64 return 0, 0, fmt.Errorf("segment is sealed, cannot write data") } - shard := s.GetShard(data.Key) + // Shard assignment is round-robin: each successive call deposits the value into the next shard, wrapping around + // after metadata.shardingFactor calls. This is safe to do without locking because Write is invoked exclusively + // from the disk_table control loop goroutine. + shard := s.nextShard + s.nextShard++ + if s.nextShard == s.metadata.shardingFactor { + s.nextShard = 0 + } currentSize := s.shardSizes[shard] if currentSize > math.MaxUint32 { @@ -421,7 +421,7 @@ func (s *Segment) Write(data *types.KVPair) (keyCount uint32, keyFileSize uint64 s.maxShardSize = s.shardSizes[shard] } s.keyCount++ - s.keyFileSize += uint64(len(data.Key)) + 4 /* uint32 length */ + 8 /* uint64 Address */ + 4 /* uint32 ValueSize */ + s.keyFileSize += uint64(len(data.Key)) + 4 /* uint32 length */ + types.AddressSerializedSize // Forward the value to the shard control loop, which asynchronously writes it to the value file. shardRequest := &valueToWrite{ @@ -436,9 +436,8 @@ func (s *Segment) Write(data *types.KVPair) (keyCount uint32, keyFileSize uint64 // Forward the value to the key and its address file control loop, which asynchronously writes it to the key file. keyRequest := &types.ScopedKey{ - Key: data.Key, - Address: types.NewAddress(s.index, firstByteIndex), - ValueSize: uint32(len(data.Value)), + Key: data.Key, + Address: types.NewAddress(s.index, firstByteIndex, uint8(shard), uint32(len(data.Value))), } err = util.Send(s.errorMonitor, s.keyFileChannel, keyRequest) @@ -459,8 +458,7 @@ func (s *Segment) GetMaxShardSize() uint64 { // // It is only thread safe to read from a segment if the key being read has previously been flushed to disk. func (s *Segment) Read(key []byte, dataAddress types.Address) ([]byte, error) { - shard := s.GetShard(key) - values := s.shards[shard] + values := s.shards[dataAddress.ShardID()] value, err := values.read(dataAddress.Offset()) if err != nil { diff --git a/sei-db/db_engine/litt/disktable/segment/segment_test.go b/sei-db/db_engine/litt/disktable/segment/segment_test.go index 30005a821b..c5ff479832 100644 --- a/sei-db/db_engine/litt/disktable/segment/segment_test.go +++ b/sei-db/db_engine/litt/disktable/segment/segment_test.go @@ -48,7 +48,6 @@ func TestWriteAndReadSegmentSingleShard(t *testing.T) { expectedLargestShardSize := uint64(0) - salt := ([16]byte)(rand.Bytes(16)) segmentPath, err := NewSegmentPath(directory, "", "table") require.NoError(t, err) err = segmentPath.MakeDirectories(false) @@ -60,7 +59,6 @@ func TestWriteAndReadSegmentSingleShard(t *testing.T) { []*SegmentPath{segmentPath}, false, 1, - salt, false) require.NoError(t, err) @@ -199,7 +197,6 @@ func TestWriteAndReadSegmentMultiShard(t *testing.T) { // a map from keys to addresses addressMap := make(map[string]types.Address) - salt := ([16]byte)(rand.Bytes(16)) segmentPath, err := NewSegmentPath(directory, "", "table") require.NoError(t, err) err = segmentPath.MakeDirectories(false) @@ -211,7 +208,6 @@ func TestWriteAndReadSegmentMultiShard(t *testing.T) { []*SegmentPath{segmentPath}, false, shardCount, - salt, false) require.NoError(t, err) @@ -359,7 +355,6 @@ func TestWriteAndReadColdShard(t *testing.T) { // a map from keys to addresses addressMap := make(map[string]types.Address) - salt := ([16]byte)(rand.Bytes(16)) segmentPath, err := NewSegmentPath(directory, "", "table") require.NoError(t, err) err = segmentPath.MakeDirectories(false) @@ -371,7 +366,6 @@ func TestWriteAndReadColdShard(t *testing.T) { []*SegmentPath{segmentPath}, false, shardCount, - salt, false) require.NoError(t, err) @@ -467,7 +461,6 @@ func TestGetFilePaths(t *testing.T) { index := rand.Uint32() shardingFactor := rand.Uint32Range(1, 10) - salt := make([]byte, 16) segmentPath, err := NewSegmentPath(t.TempDir(), "", "table") require.NoError(t, err) @@ -482,7 +475,6 @@ func TestGetFilePaths(t *testing.T) { []*SegmentPath{segmentPath}, false, shardingFactor, - ([16]byte)(salt), false) require.NoError(t, err) @@ -522,3 +514,71 @@ func TestGetFilePaths(t *testing.T) { require.Equal(t, segment.shards[i].path(), valueFiles[i]) } } + +// TestRoundRobinShardAssignment writes exactly `valuesPerShard * shardingFactor` keys to a segment and verifies +// that each shard received exactly `valuesPerShard` of them, in round-robin insertion order. This is the core +// guarantee of the round-robin shard assignment scheme: it does not rely on the contents of the keys at all. +func TestRoundRobinShardAssignment(t *testing.T) { + t.Parallel() + + ctx := t.Context() + rand := util.NewTestRandom() + logger := slog.Default() + directory := t.TempDir() + + const shardingFactor uint32 = 7 + const valuesPerShard = 13 + const valueCount = int(shardingFactor) * valuesPerShard + + segmentPath, err := NewSegmentPath(directory, "", "table") + require.NoError(t, err) + err = segmentPath.MakeDirectories(false) + require.NoError(t, err) + + seg, err := CreateSegment( + logger, + util.NewErrorMonitor(ctx, logger, nil), + rand.Uint32(), + []*SegmentPath{segmentPath}, + false, + shardingFactor, + false) + require.NoError(t, err) + + // Capture the address that the segment assigns to each write, in insertion order. + insertionOrderShards := make([]uint8, 0, valueCount) + + for i := 0; i < valueCount; i++ { + key := rand.PrintableVariableBytes(8, 32) + value := rand.PrintableVariableBytes(8, 32) + _, _, err := seg.Write(&types.KVPair{Key: key, Value: value}) + require.NoError(t, err) + + flushFn, err := seg.Flush() + require.NoError(t, err) + flushed, err := flushFn() + require.NoError(t, err) + // Each iteration above should produce exactly one new flushed key (the one we just wrote). + require.Len(t, flushed, 1) + insertionOrderShards = append(insertionOrderShards, flushed[0].Address.ShardID()) + } + + // The i-th key written should land in shard (i % shardingFactor). + for i, gotShard := range insertionOrderShards { + expectedShard := uint8(uint32(i) % shardingFactor) + require.Equal(t, expectedShard, gotShard, + "value at insertion index %d landed in shard %d, expected shard %d", + i, gotShard, expectedShard) + } + + // And each shard should have received exactly valuesPerShard values. + perShardCounts := make(map[uint8]int) + for _, s := range insertionOrderShards { + perShardCounts[s]++ + } + require.Len(t, perShardCounts, int(shardingFactor)) + for s := uint8(0); s < uint8(shardingFactor); s++ { + require.Equal(t, valuesPerShard, perShardCounts[s], + "shard %d received %d values, expected %d", s, perShardCounts[s], valuesPerShard) + } +} diff --git a/sei-db/db_engine/litt/disktable/segment/segment_version.go b/sei-db/db_engine/litt/disktable/segment/segment_version.go index b172effb37..a7cc21ecbb 100644 --- a/sei-db/db_engine/litt/disktable/segment/segment_version.go +++ b/sei-db/db_engine/litt/disktable/segment/segment_version.go @@ -4,19 +4,21 @@ package segment // SegmentVersion is used to indicate the serialization version of a segment. Whenever serialization formats change // in segment files, this version should be incremented. +// +// Versions 0, 1, and 2 are no longer supported and have been removed from the codebase. The current code can only +// read and write segments at LatestSegmentVersion. The constant numbers below are kept implicitly (no constant is +// declared for them) so that LatestSegmentVersion still increases monotonically as a historical record. type SegmentVersion uint32 const ( - // OldHashFunctionSegmentVersion is the serialization version for the old hash function. - OldHashFunctionSegmentVersion SegmentVersion = 0 - - // SipHashSegmentVersion is the version when the siphash hash function was introduced for sharding. - SipHashSegmentVersion SegmentVersion = 1 - - // ValueSizeSegmentVersion adds the length of values to the key file. Previously, only the key and the address were - // stored in the key file. It also adds the key count to the segment metadata file. - ValueSizeSegmentVersion SegmentVersion = 2 + // ShardedAddressSegmentVersion is the on-disk format that: + // - Replaces the legacy 8-byte address + separate value size in the key file with the 13-byte sharded + // Address layout (index, offset, shardID, valueSize). The keymap stores the same layout. + // - Drops the per-segment hashing salt from the metadata file. Shards are assigned to values in + // round-robin order at write time, which makes the key->shard mapping unpredictable to outside + // callers without needing a hash function or any randomness in the metadata. + ShardedAddressSegmentVersion SegmentVersion = 3 ) // LatestSegmentVersion always refers to the latest version of the segment serialization format. -const LatestSegmentVersion = ValueSizeSegmentVersion +const LatestSegmentVersion = ShardedAddressSegmentVersion diff --git a/sei-db/db_engine/litt/docs/architecture.md b/sei-db/db_engine/litt/docs/architecture.md index a769670e48..45424bae54 100644 --- a/sei-db/db_engine/litt/docs/architecture.md +++ b/sei-db/db_engine/litt/docs/architecture.md @@ -190,20 +190,20 @@ spread those pieces across multiple locations. Key files and metadata files are small. For the sake of simplicity, let's not bother sharding those. Value files are big. Break apart value files, and have one value file per shard. -When writing data, the first thing to do will be to figure out which shard the data belongs in. Do this by taking a -hash of the key modulo the number of shards. +When writing data, the first thing to do will be to figure out which shard the data belongs in. We assign shards in +round-robin order: the first write into a fresh segment goes to shard 0, the second to shard 1, and so on, wrapping +around once we have used every shard. -When reading data, we need to do the reverse. Take a hash of the key modulo the number of shards to figure out which -shard to look in. As a consequence, the address alone is no longer enough information to find the data. We also need -to know the key when looking up data. But this isn't a problem, since we always have access to the key when we are -looking up data. +When reading data, we need to know which shard the value lives in. To avoid having to recompute or look anything up, +the address itself records the shard ID alongside the offset. So the address is a self-contained pointer to the +value: given just the address, we know exactly which file to open and where in that file to seek. -From a security perspective, sharding with a predictable hash is dangerous. An attacker could, in theory, craft keys -that all map to the same shard, causing a hot spot in the database. To prevent this, the database chooses a random -"salt" value that it includes in the hash function. As long as an attacker does not know the salt value, they cannot -predict which shard a key will map to. +Round-robin assignment has a nice security side effect for free. A predictable key-to-shard hash is dangerous because +an attacker could craft keys that all map to the same shard and create a hot spot. With round-robin, the shard chosen +for a given value depends only on the order in which it was written, not on the contents of its key, so this attack +is not even expressible. -We already have a metadata file for each segment. We can go ahead and save the sharding factor and salt in the metadata +We already have a metadata file for each segment. We can go ahead and save the sharding factor in the metadata file. This will give us enough information to find data contained within the segment. ## Iteration 9: Multi-table support diff --git a/sei-db/db_engine/litt/go.mod b/sei-db/db_engine/litt/go.mod index 5d17a909b7..669427e11a 100644 --- a/sei-db/db_engine/litt/go.mod +++ b/sei-db/db_engine/litt/go.mod @@ -33,7 +33,6 @@ go 1.25.6 require ( github.com/cockroachdb/pebble/v2 v2.1.3 - github.com/dchest/siphash v1.2.3 github.com/docker/docker v28.2.2+incompatible github.com/docker/go-connections v0.5.0 github.com/docker/go-units v0.5.0 diff --git a/sei-db/db_engine/litt/go.sum b/sei-db/db_engine/litt/go.sum index a447a0b859..08fb3195ec 100644 --- a/sei-db/db_engine/litt/go.sum +++ b/sei-db/db_engine/litt/go.sum @@ -45,8 +45,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6N github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dchest/siphash v1.2.3 h1:QXwFc8cFOR2dSa/gE6o/HokBMWtLUaNDVd+22aKHeEA= -github.com/dchest/siphash v1.2.3/go.mod h1:0NvQU092bT0ipiFN++/rXm69QG9tVxLAlQHIXMPAkHc= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw= diff --git a/sei-db/db_engine/litt/littbuilder/build_utils.go b/sei-db/db_engine/litt/littbuilder/build_utils.go index 646e6580ec..2ed53b4d93 100644 --- a/sei-db/db_engine/litt/littbuilder/build_utils.go +++ b/sei-db/db_engine/litt/littbuilder/build_utils.go @@ -205,6 +205,10 @@ func buildTable( if config.ShardingFactor < 1 { return nil, fmt.Errorf("sharding factor must be at least 1") } + if config.ShardingFactor > litt.MaxShardingFactor { + return nil, fmt.Errorf("sharding factor must be at most %d, got %d", + litt.MaxShardingFactor, config.ShardingFactor) + } kmap, keymapDirectory, keymapTypeFile, requiresReload, err := buildKeymap(config, logger, name) if err != nil { diff --git a/sei-db/db_engine/litt/littdb_config.go b/sei-db/db_engine/litt/littdb_config.go index af3c2b5e73..5b3471b12c 100644 --- a/sei-db/db_engine/litt/littdb_config.go +++ b/sei-db/db_engine/litt/littdb_config.go @@ -7,7 +7,6 @@ import ( "fmt" "log/slog" "math" - "math/rand" "time" "github.com/docker/go-units" @@ -16,6 +15,10 @@ import ( "github.com/sei-protocol/sei-chain/sei-db/db_engine/litt/util" ) +// MaxShardingFactor is the largest legal value for Config.ShardingFactor. The shard ID is encoded as a single byte +// inside the on-disk Address, which limits the number of distinct shards to 2^8 = 256. +const MaxShardingFactor = 256 + // Config is configuration for a litt.DB. type Config struct { // The context for the database. If nil, context.Background() is used. @@ -77,13 +80,9 @@ type Config struct { // have multiple shard files. If the sharding factor is smaller than the number of paths, then some paths may not // always have an actively written shard file. // - // The default is 8. Must be at least 1. + // The default is 8. Must be in the range [1, MaxShardingFactor]. ShardingFactor uint32 - // The random number generator used for generating sharding salts. The default is a standard rand.New() - // seeded by the current time. - SaltShaker *rand.Rand - // The size of the cache for tables that have not had their write cache size set. A write cache is used // to store recently written values for fast access. The default is 0 (no cache). // Cache size is in bytes, and includes the size of both the key and the value. Cache size can be set @@ -180,9 +179,6 @@ func DefaultConfig(paths ...string) (*Config, error) { // DefaultConfigNoPaths returns a Config with default values, and does not require any paths to be provided. // If paths are not set prior to use, then the DB will return an error at startup. func DefaultConfigNoPaths() *Config { - seed := time.Now().UnixNano() - saltShaker := rand.New(rand.NewSource(seed)) - return &Config{ CTX: context.Background(), Logger: slog.Default(), @@ -190,7 +186,6 @@ func DefaultConfigNoPaths() *Config { GCPeriod: 5 * time.Minute, GCBatchSize: 10_000, ShardingFactor: 8, - SaltShaker: saltShaker, KeymapType: keymap.PebbleDBKeymapType, ControlChannelSize: 64, TargetSegmentFileSize: math.MaxUint32, @@ -248,6 +243,9 @@ func (c *Config) SanityCheck() error { if c.ShardingFactor == 0 { return fmt.Errorf("sharding factor must be at least 1") } + if c.ShardingFactor > MaxShardingFactor { + return fmt.Errorf("sharding factor must be at most %d, got %d", MaxShardingFactor, c.ShardingFactor) + } if c.ControlChannelSize == 0 { return fmt.Errorf("control channel size must be at least 1") } @@ -263,9 +261,6 @@ func (c *Config) SanityCheck() error { if c.GCPeriod == 0 { return fmt.Errorf("gc period must be at least 1") } - if c.SaltShaker == nil { - return fmt.Errorf("salt shaker cannot be nil") - } if (c.MetricsEnabled || c.MetricsRegistry != nil) && c.MetricsUpdateInterval == 0 { return fmt.Errorf("metrics update interval must be at least 1 if metrics are enabled") } diff --git a/sei-db/db_engine/litt/littdb_config_test.go b/sei-db/db_engine/litt/littdb_config_test.go new file mode 100644 index 0000000000..3a0c4994b0 --- /dev/null +++ b/sei-db/db_engine/litt/littdb_config_test.go @@ -0,0 +1,37 @@ +//go:build littdb_wip + +package litt + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSanityCheckShardingFactorBounds(t *testing.T) { + t.Parallel() + + t.Run("zero is rejected", func(t *testing.T) { + t.Parallel() + config, err := DefaultConfig("/tmp/litt-test") + require.NoError(t, err) + config.ShardingFactor = 0 + require.Error(t, config.SanityCheck()) + }) + + t.Run("MaxShardingFactor is accepted", func(t *testing.T) { + t.Parallel() + config, err := DefaultConfig("/tmp/litt-test") + require.NoError(t, err) + config.ShardingFactor = MaxShardingFactor + require.NoError(t, config.SanityCheck()) + }) + + t.Run("MaxShardingFactor + 1 is rejected", func(t *testing.T) { + t.Parallel() + config, err := DefaultConfig("/tmp/litt-test") + require.NoError(t, err) + config.ShardingFactor = MaxShardingFactor + 1 + require.Error(t, config.SanityCheck()) + }) +} diff --git a/sei-db/db_engine/litt/test/table_test.go b/sei-db/db_engine/litt/test/table_test.go index 2db5402831..f157385a31 100644 --- a/sei-db/db_engine/litt/test/table_test.go +++ b/sei-db/db_engine/litt/test/table_test.go @@ -138,7 +138,6 @@ func buildMemKeyDiskTable( config.Clock = clock config.Fsync = false config.DoubleWriteProtection = true - config.SaltShaker = util.NewTestRandom().Rand config.TargetSegmentFileSize = 100 // intentionally use a very small segment size config.Logger = logger @@ -185,7 +184,6 @@ func buildPebbleDBKeyDiskTable( config.Clock = clock config.Fsync = false config.DoubleWriteProtection = true - config.SaltShaker = util.NewTestRandom().Rand config.TargetSegmentFileSize = 100 // intentionally use a very small segment size config.Logger = logger diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/000002.log b/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/000002.log deleted file mode 100644 index 1b7abab4dc..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/000002.log and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-0.values deleted file mode 100644 index 0e2633c55c..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-0.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-1.values deleted file mode 100644 index dddbbf6344..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-1.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-2.values deleted file mode 100644 index 44c3cdd182..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-2.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/0-3.values deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/0.keys deleted file mode 100644 index 65cab9c89b..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0.keys and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/0.metadata deleted file mode 100644 index 8a8e7ddf33..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/0.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-0.values deleted file mode 100644 index 12e110e188..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-0.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-1.values deleted file mode 100644 index a8d54a9950..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-1.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-2.values deleted file mode 100644 index 1fcbda4d49..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-2.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/1-3.values deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/1.keys deleted file mode 100644 index c8ac100441..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1.keys and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/1.metadata deleted file mode 100644 index d6b0009259..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/1.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-0.values deleted file mode 100644 index a1fa22180a..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-0.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-1.values deleted file mode 100644 index 98fbe59a9e..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-1.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-2.values deleted file mode 100644 index ca0e85a503..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-2.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/2-3.values deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/2.keys deleted file mode 100644 index ed5b06dfb1..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2.keys and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/2.metadata deleted file mode 100644 index 20b0a61628..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/2.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-0.values deleted file mode 100644 index 6f52f3e679..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-0.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-1.values deleted file mode 100644 index c2c0d3509c..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-1.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-2.values deleted file mode 100644 index 91d59f96e6..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-2.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-3.values deleted file mode 100644 index fd7c5e0699..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3-3.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/3.keys deleted file mode 100644 index b3c346a148..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3.keys and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/3.metadata deleted file mode 100644 index 03e611b9de..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/3.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-0.values deleted file mode 100644 index 10d25de74e..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-0.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-1.values deleted file mode 100644 index 18bedaf41a..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-1.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-2.values deleted file mode 100644 index 60f42b9e61..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-2.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-3.values deleted file mode 100644 index 1eab4a8104..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4-3.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/4.keys deleted file mode 100644 index 8433e98cab..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4.keys and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/4.metadata deleted file mode 100644 index 7971d6f761..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/4.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-0.values deleted file mode 100644 index 82b669d005..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-0.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-1.values deleted file mode 100644 index afc7333b1c..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-1.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-2.values deleted file mode 100644 index dc8b704212..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-2.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-3.values deleted file mode 100644 index 7b0731a3ee..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5-3.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/5.keys deleted file mode 100644 index 12b77bafcc..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5.keys and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/5.metadata deleted file mode 100644 index 0c178d951e..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/5.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-0.values deleted file mode 100644 index a35d908795..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-0.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-1.values deleted file mode 100644 index 6b940ec063..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-1.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-2.values deleted file mode 100644 index 3573e68e11..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-2.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-3.values deleted file mode 100644 index 38d2b0b736..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6-3.values and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/6.keys deleted file mode 100644 index 3c853516c6..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6.keys and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/6.metadata deleted file mode 100644 index b69c1e738d..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/6.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-0.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-0.values deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-1.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-1.values deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-2.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-2.values deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-3.values b/sei-db/db_engine/litt/test/testdata/v2/test/segments/7-3.values deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/7.keys b/sei-db/db_engine/litt/test/testdata/v2/test/segments/7.keys deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/segments/7.metadata b/sei-db/db_engine/litt/test/testdata/v2/test/segments/7.metadata deleted file mode 100644 index a962c20776..0000000000 Binary files a/sei-db/db_engine/litt/test/testdata/v2/test/segments/7.metadata and /dev/null differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/000002.log b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/000002.log new file mode 100644 index 0000000000..123ad6e771 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/000002.log differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/LOCK b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/LOCK similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/LOCK rename to sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/LOCK diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/MANIFEST-000001 b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/MANIFEST-000001 similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/MANIFEST-000001 rename to sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/MANIFEST-000001 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/OPTIONS-000003 b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/OPTIONS-000003 similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/OPTIONS-000003 rename to sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/OPTIONS-000003 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/marker.format-version.000001.013 b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/marker.format-version.000001.013 similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/marker.format-version.000001.013 rename to sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/marker.format-version.000001.013 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/marker.manifest.000001.MANIFEST-000001 b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/marker.manifest.000001.MANIFEST-000001 similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/keymap/data/marker.manifest.000001.MANIFEST-000001 rename to sei-db/db_engine/litt/test/testdata/v3/test/keymap/data/marker.manifest.000001.MANIFEST-000001 diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/initialized b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/initialized similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/keymap/initialized rename to sei-db/db_engine/litt/test/testdata/v3/test/keymap/initialized diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/keymap/keymap-type.txt b/sei-db/db_engine/litt/test/testdata/v3/test/keymap/keymap-type.txt similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/keymap/keymap-type.txt rename to sei-db/db_engine/litt/test/testdata/v3/test/keymap/keymap-type.txt diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-0.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-0.values new file mode 100644 index 0000000000..331a14864d Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-0.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-1.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-1.values new file mode 100644 index 0000000000..37ad8c80a9 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-1.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-2.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-2.values new file mode 100644 index 0000000000..e0dcd18ce5 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-2.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-3.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-3.values new file mode 100644 index 0000000000..403726f7e6 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0-3.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/0.keys b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0.keys new file mode 100644 index 0000000000..1b20c9cb4e Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0.keys differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/0.metadata b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0.metadata new file mode 100644 index 0000000000..59f5cc7e39 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/0.metadata differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-0.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-0.values new file mode 100644 index 0000000000..5021c4a0e1 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-0.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-1.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-1.values new file mode 100644 index 0000000000..2c2322eec8 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-1.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-2.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-2.values new file mode 100644 index 0000000000..a3a0a1cc52 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-2.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-3.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-3.values new file mode 100644 index 0000000000..d6e12ce43a Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1-3.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/1.keys b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1.keys new file mode 100644 index 0000000000..e4de555f72 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1.keys differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/1.metadata b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1.metadata new file mode 100644 index 0000000000..ceb0472df2 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/1.metadata differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-0.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-0.values new file mode 100644 index 0000000000..a6213d16ef Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-0.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-1.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-1.values new file mode 100644 index 0000000000..9acdd2b3b0 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-1.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-2.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-2.values new file mode 100644 index 0000000000..26f273a2a2 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-2.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-3.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-3.values new file mode 100644 index 0000000000..fc8433606c Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2-3.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/2.keys b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2.keys new file mode 100644 index 0000000000..b075b9adaf Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2.keys differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/2.metadata b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2.metadata new file mode 100644 index 0000000000..c6c01a5bb1 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/2.metadata differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-0.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-0.values new file mode 100644 index 0000000000..736194f339 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-0.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-1.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-1.values new file mode 100644 index 0000000000..8ba67c346b Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-1.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-2.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-2.values new file mode 100644 index 0000000000..64f9fb5824 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-2.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-3.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-3.values new file mode 100644 index 0000000000..e3e316f029 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3-3.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/3.keys b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3.keys new file mode 100644 index 0000000000..521be25139 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3.keys differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/3.metadata b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3.metadata new file mode 100644 index 0000000000..731f63f0c8 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/3.metadata differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-0.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-0.values new file mode 100644 index 0000000000..372cfe7001 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-0.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-1.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-1.values new file mode 100644 index 0000000000..8234bd8132 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-1.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-2.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-2.values new file mode 100644 index 0000000000..be061c6a2d Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-2.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-3.values b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-3.values new file mode 100644 index 0000000000..33525506ca Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4-3.values differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/4.keys b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4.keys new file mode 100644 index 0000000000..5d0f0a0fa8 Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4.keys differ diff --git a/sei-db/db_engine/litt/test/testdata/v3/test/segments/4.metadata b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4.metadata new file mode 100644 index 0000000000..03885bbf3f Binary files /dev/null and b/sei-db/db_engine/litt/test/testdata/v3/test/segments/4.metadata differ diff --git a/sei-db/db_engine/litt/test/testdata/v2/test/table.metadata b/sei-db/db_engine/litt/test/testdata/v3/test/table.metadata similarity index 100% rename from sei-db/db_engine/litt/test/testdata/v2/test/table.metadata rename to sei-db/db_engine/litt/test/testdata/v3/test/table.metadata diff --git a/sei-db/db_engine/litt/types/address.go b/sei-db/db_engine/litt/types/address.go index dc3a6dfcb3..b9fe6ed1f8 100644 --- a/sei-db/db_engine/litt/types/address.go +++ b/sei-db/db_engine/litt/types/address.go @@ -7,41 +7,83 @@ import ( "fmt" ) -// Address describes the location of data on disk. -// The first 4 bytes are the file ID, and the second 4 bytes are the offset of the data within the file. -type Address uint64 +// AddressSerializedSize is the on-disk size of a serialized Address in bytes. +// Layout: index(4) | offset(4) | shardID(1) | valueSize(4) +const AddressSerializedSize = 13 -// NewAddress creates a new address -func NewAddress(index uint32, offset uint32) Address { - return Address(uint64(index)<<32 | uint64(offset)) +// Address describes the location of a value on disk. +// +// An Address identifies the file the value lives in (Index), the byte offset of the value's length prefix +// within that file (Offset), the shard within the segment that owns the value (ShardID), and the size of +// the value itself in bytes (ValueSize). +type Address struct { + // index is the segment index that owns the value. Combined with the shardID, it identifies the value file + // that contains the value's bytes. + index uint32 + // offset is the byte position of the value's length prefix within the shard's value file. The value's + // bytes immediately follow the 4-byte length prefix. + offset uint32 + // shardID is the index of the shard within the segment that holds the value. Encoded as a single byte, + // which caps the maximum sharding factor at 256. + shardID uint8 + // valueSize is the length of the value in bytes (not counting the 4-byte length prefix on disk). + valueSize uint32 } -// DeserializeAddress converts a byte slice to an address. +// NewAddress creates a new Address. +func NewAddress(index uint32, offset uint32, shardID uint8, valueSize uint32) Address { + return Address{ + index: index, + offset: offset, + shardID: shardID, + valueSize: valueSize, + } +} + +// DeserializeAddress converts a byte slice to an Address. The slice must be exactly AddressSerializedSize bytes. func DeserializeAddress(bytes []byte) (Address, error) { - if len(bytes) != 8 { - return 0, fmt.Errorf("invalid address length: %d", len(bytes)) + if len(bytes) != AddressSerializedSize { + return Address{}, fmt.Errorf("invalid address length: %d", len(bytes)) } - return Address(binary.BigEndian.Uint64(bytes)), nil + return Address{ + index: binary.BigEndian.Uint32(bytes[0:4]), + offset: binary.BigEndian.Uint32(bytes[4:8]), + shardID: bytes[8], + valueSize: binary.BigEndian.Uint32(bytes[9:13]), + }, nil } -// Index returns the file index of the value address. +// Index returns the segment index of the value. func (a Address) Index() uint32 { - return uint32(a >> 32) + return a.index } -// Offset returns the offset of the value address. +// Offset returns the byte offset of the value within its shard's value file. func (a Address) Offset() uint32 { - return uint32(a) + return a.offset +} + +// ShardID returns the shard within the segment that owns the value. +func (a Address) ShardID() uint8 { + return a.shardID +} + +// ValueSize returns the size of the value in bytes. +func (a Address) ValueSize() uint32 { + return a.valueSize } // String returns a string representation of the address. func (a Address) String() string { - return fmt.Sprintf("(%d:%d)", a.Index(), a.Offset()) + return fmt.Sprintf("(%d:%d@%d, %d)", a.index, a.offset, a.shardID, a.valueSize) } -// Serialize converts the address to a byte slice. +// Serialize converts the address to a byte slice of length AddressSerializedSize. func (a Address) Serialize() []byte { - bytes := make([]byte, 8) - binary.BigEndian.PutUint64(bytes, uint64(a)) + bytes := make([]byte, AddressSerializedSize) + binary.BigEndian.PutUint32(bytes[0:4], a.index) + binary.BigEndian.PutUint32(bytes[4:8], a.offset) + bytes[8] = a.shardID + binary.BigEndian.PutUint32(bytes[9:13], a.valueSize) return bytes } diff --git a/sei-db/db_engine/litt/types/scoped_key.go b/sei-db/db_engine/litt/types/scoped_key.go index 418e8aaab7..0ccb471a61 100644 --- a/sei-db/db_engine/litt/types/scoped_key.go +++ b/sei-db/db_engine/litt/types/scoped_key.go @@ -2,12 +2,11 @@ package types -// ScopedKey is a key, plus additional information about the value associated with the key. +// ScopedKey is a key paired with the Address that locates its value on disk. +// The value's size and the owning shard are both stored within the Address itself. type ScopedKey struct { // A key in the DB. Key []byte // The location where the value associated with the key is stored. Address Address - // The length of the value associated with the key. - ValueSize uint32 } diff --git a/sei-db/db_engine/litt/util/hashing.go b/sei-db/db_engine/litt/util/hashing.go deleted file mode 100644 index dfd210d59e..0000000000 --- a/sei-db/db_engine/litt/util/hashing.go +++ /dev/null @@ -1,74 +0,0 @@ -//go:build littdb_wip - -package util - -import ( - "encoding/binary" - - "github.com/dchest/siphash" -) - -// Perm64 computes A permutation (invertible function) on 64 bits. -// The constants were found by automated search, to -// optimize avalanche. Avalanche means that for a -// random number x, flipping bit i of x has about a -// 50 percent chance of flipping bit j of perm64(x). -// For each possible pair (i,j), this function achieves -// a probability between 49.8 and 50.2 percent. -// -// Warning: this is not a cryptographic hash function. This hash function may be suitable for hash tables, but not for -// cryptographic purposes. It is trivially easy to reverse this function. -// -// Algorithm borrowed from https://github.com/hiero-ledger/hiero-consensus-node/blob/main/platform-sdk/swirlds-common/src/main/java/com/swirlds/common/utility/NonCryptographicHashing.java -// (original implementation is under Apache 2.0 license, algorithm designed by Leemon Baird) -func Perm64(x uint64) uint64 { - // This is necessary so that 0 does not hash to 0. - // As a side effect this constant will hash to 0. - x ^= 0x5e8a016a5eb99c18 - - x += x << 30 - x ^= x >> 27 - x += x << 16 - x ^= x >> 20 - x += x << 5 - x ^= x >> 18 - x += x << 10 - x ^= x >> 24 - x += x << 30 - return x -} - -// Perm64Bytes hashes a byte slice using perm64. -func Perm64Bytes(b []byte) uint64 { - x := uint64(0) - - for i := 0; i < len(b); i += 8 { - var next uint64 - if i+8 <= len(b) { - // grab the next 8 bytes - next = binary.BigEndian.Uint64(b[i:]) - } else { - // insufficient bytes, pad with zeros - nextBytes := make([]byte, 8) - copy(nextBytes, b[i:]) - next = binary.BigEndian.Uint64(nextBytes) - } - x = Perm64(next ^ x) - } - - return x -} - -// LegacyHashKey hash a key using the original littDB hash function. Once all data stored using the original -// hash function is deleted, this function can be removed. -func LegacyHashKey(key []byte, salt uint32) uint32 { - return uint32(Perm64(Perm64Bytes(key) ^ uint64(salt))) -} - -// HashKey hashes a key using perm64 and a salt. -func HashKey(key []byte, salt [16]byte) uint32 { - leftSalt := binary.BigEndian.Uint64(salt[:8]) - rightSalt := binary.BigEndian.Uint64(salt[8:]) - hash := siphash.Hash(leftSalt, rightSalt, key) - return uint32(hash) -}