From 5938fbf305cfd64423c4bf333faf8f294a8b2542 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 00:20:00 +0300 Subject: [PATCH 01/15] Add skeleton for the search index functionality --- scdb/internal/buffers/buffer.go | 14 +- scdb/internal/buffers/buffer_test.go | 6 +- scdb/internal/buffers/pool.go | 77 +++-- scdb/internal/buffers/pool_test.go | 271 ++++++++++++---- .../entries/{ => headers}/db_file_header.go | 2 +- .../{ => headers}/db_file_header_test.go | 2 +- .../entries/{ => values}/key_value.go | 2 +- .../entries/{ => values}/key_value_test.go | 2 +- scdb/internal/inverted_index.go | 56 ++++ scdb/internal/inverted_index_test.go | 1 + scdb/store.go | 99 +++++- scdb/store_test.go | 288 +++++++++++++++++- 12 files changed, 699 insertions(+), 121 deletions(-) rename scdb/internal/entries/{ => headers}/db_file_header.go (99%) rename scdb/internal/entries/{ => headers}/db_file_header_test.go (99%) rename scdb/internal/entries/{ => values}/key_value.go (99%) rename scdb/internal/entries/{ => values}/key_value_test.go (99%) create mode 100644 scdb/internal/inverted_index.go create mode 100644 scdb/internal/inverted_index_test.go diff --git a/scdb/internal/buffers/buffer.go b/scdb/internal/buffers/buffer.go index 0de78d6..2619205 100644 --- a/scdb/internal/buffers/buffer.go +++ b/scdb/internal/buffers/buffer.go @@ -3,7 +3,7 @@ package buffers import ( "bytes" "github.com/sopherapps/go-scdb/scdb/internal" - "github.com/sopherapps/go-scdb/scdb/internal/entries" + "github.com/sopherapps/go-scdb/scdb/internal/entries/values" "math" ) @@ -76,9 +76,9 @@ func (b *Buffer) Replace(addr uint64, data []byte) error { // GetValue returns the *entries.KeyValueEntry at the given address if the key there corresponds to the given key // Otherwise, it returns nil. This is to handle hash collisions. -func (b *Buffer) GetValue(addr uint64, key []byte) (*entries.KeyValueEntry, error) { +func (b *Buffer) GetValue(addr uint64, key []byte) (*values.KeyValueEntry, error) { offset := addr - b.LeftOffset - entry, err := entries.ExtractKeyValueEntryFromByteArray(b.Data, offset) + entry, err := values.ExtractKeyValueEntryFromByteArray(b.Data, offset) if err != nil { return nil, err } @@ -103,12 +103,12 @@ func (b *Buffer) ReadAt(addr uint64, size uint64) ([]byte, error) { // AddrBelongsToKey checks to see if the given address is for the given key func (b *Buffer) AddrBelongsToKey(addr uint64, key []byte) (bool, error) { keySize := uint64(len(key)) - err := internal.ValidateBounds(addr, addr+keySize+entries.OffsetForKeyInKVArray, b.LeftOffset, b.RightOffset, "address out of bounds") + err := internal.ValidateBounds(addr, addr+keySize+values.OffsetForKeyInKVArray, b.LeftOffset, b.RightOffset, "address out of bounds") if err != nil { return false, err } - lw := addr - b.LeftOffset + entries.OffsetForKeyInKVArray + lw := addr - b.LeftOffset + values.OffsetForKeyInKVArray keyInData := b.Data[lw : lw+keySize] return bytes.Equal(keyInData, key), nil } @@ -117,12 +117,12 @@ func (b *Buffer) AddrBelongsToKey(addr uint64, key []byte) (bool, error) { // It returns false if the kv entry at the given address is not for the given key func (b *Buffer) TryDeleteKvEntry(addr uint64, key []byte) (bool, error) { keySize := uint64(len(key)) - err := internal.ValidateBounds(addr, addr+keySize+entries.OffsetForKeyInKVArray, b.LeftOffset, b.RightOffset, "address out of bounds") + err := internal.ValidateBounds(addr, addr+keySize+values.OffsetForKeyInKVArray, b.LeftOffset, b.RightOffset, "address out of bounds") if err != nil { return false, err } - keyOffset := addr - b.LeftOffset + entries.OffsetForKeyInKVArray + keyOffset := addr - b.LeftOffset + values.OffsetForKeyInKVArray keyInData := b.Data[keyOffset : keyOffset+keySize] if bytes.Equal(keyInData, key) { diff --git a/scdb/internal/buffers/buffer_test.go b/scdb/internal/buffers/buffer_test.go index 3f675b8..eb1eab7 100644 --- a/scdb/internal/buffers/buffer_test.go +++ b/scdb/internal/buffers/buffer_test.go @@ -1,7 +1,7 @@ package buffers import ( - "github.com/sopherapps/go-scdb/scdb/internal/entries" + "github.com/sopherapps/go-scdb/scdb/internal/entries/values" "github.com/stretchr/testify/assert" "testing" ) @@ -114,11 +114,11 @@ func TestBuffer_GetValue(t *testing.T) { type testRecord struct { addr uint64 key []byte - expected *entries.KeyValueEntry + expected *values.KeyValueEntry } buf := NewBuffer(79, KvDataArray, CAPACITY) - kv := entries.NewKeyValueEntry([]byte("foo"), []byte("bar"), 0) + kv := values.NewKeyValueEntry([]byte("foo"), []byte("bar"), 0) testData := []testRecord{ {79, []byte("foo"), kv}, {79, []byte("bar"), nil}, diff --git a/scdb/internal/buffers/pool.go b/scdb/internal/buffers/pool.go index 7286e6d..c17d3e4 100644 --- a/scdb/internal/buffers/pool.go +++ b/scdb/internal/buffers/pool.go @@ -3,8 +3,10 @@ package buffers import ( "bytes" "errors" + "fmt" "github.com/sopherapps/go-scdb/scdb/internal" - "github.com/sopherapps/go-scdb/scdb/internal/entries" + "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" + "github.com/sopherapps/go-scdb/scdb/internal/entries/values" "io" "math" "os" @@ -13,6 +15,18 @@ import ( const DefaultPoolCapacity uint64 = 5 +// KeyValuePair is a pair of key and value +// +// It is especially useful when searching +type KeyValuePair struct { + K []byte + V []byte +} + +func (kv KeyValuePair) String() string { + return fmt.Sprintf("%s: %s", kv.K, kv.V) +} + // BufferPool is a pool of key-value and index Buffer's. // // It is possible to have more than one buffer with the same address in a kind of overlap @@ -65,15 +79,15 @@ func NewBufferPool(capacity *uint64, filePath string, maxKeys *uint64, redundant return nil, err } - var header *entries.DbFileHeader + var header *headers.DbFileHeader if !dbFileExists { - header = entries.NewDbFileHeader(maxKeys, redundantBlocks, &bufSize) + header = headers.NewDbFileHeader(maxKeys, redundantBlocks, &bufSize) _, err = initializeDbFile(file, header) if err != nil { return nil, err } } else { - header, err = entries.ExtractDbFileHeaderFromFile(file) + header, err = headers.ExtractDbFileHeaderFromFile(file) if err != nil { return nil, err } @@ -152,12 +166,12 @@ func (bp *BufferPool) Append(data []byte) (uint64, error) { // or (addr + data length) is greater than or equal BufferPool.keyValuesStartPoint func (bp *BufferPool) UpdateIndex(addr uint64, data []byte) error { dataLength := uint64(len(data)) - err := internal.ValidateBounds(addr, addr+dataLength, entries.HeaderSizeInBytes, bp.keyValuesStartPoint, "data is outside the index bounds") + err := internal.ValidateBounds(addr, addr+dataLength, headers.HeaderSizeInBytes, bp.keyValuesStartPoint, "data is outside the index bounds") if err != nil { return err } - blockLeftOffset := bp.getBlockLeftOffset(addr, entries.HeaderSizeInBytes) + blockLeftOffset := bp.getBlockLeftOffset(addr, headers.HeaderSizeInBytes) buf, ok := bp.indexBuffers[blockLeftOffset] if ok { err = buf.Replace(addr, data) @@ -173,7 +187,7 @@ func (bp *BufferPool) UpdateIndex(addr uint64, data []byte) error { // ClearFile clears all data on disk and memory making it like a new store func (bp *BufferPool) ClearFile() error { bufSize := uint32(bp.bufferSize) - header := entries.NewDbFileHeader(&bp.maxKeys, &bp.redundantBlocks, &bufSize) + header := headers.NewDbFileHeader(&bp.maxKeys, &bp.redundantBlocks, &bufSize) fileSize, err := initializeDbFile(bp.File, header) if err != nil { return err @@ -186,7 +200,7 @@ func (bp *BufferPool) ClearFile() error { // CompactFile removes any deleted or expired entries from the file. It must first lock the buffer and the file. // In order to be more efficient, it creates a new file, copying only that data which is not deleted or expired -func (bp *BufferPool) CompactFile() error { +func (bp *BufferPool) CompactFile(searchIndex *internal.InvertedIndex) error { folder := filepath.Dir(bp.FilePath) newFilePath := filepath.Join(folder, "tmp__compact.scdb") newFile, err := os.OpenFile(newFilePath, os.O_RDWR|os.O_CREATE, 0666) @@ -194,7 +208,7 @@ func (bp *BufferPool) CompactFile() error { return err } - header, err := entries.ExtractDbFileHeaderFromFile(bp.File) + header, err := headers.ExtractDbFileHeaderFromFile(bp.File) if err != nil { return err } @@ -205,11 +219,11 @@ func (bp *BufferPool) CompactFile() error { return err } - idxEntrySize := entries.IndexEntrySizeInBytes + idxEntrySize := headers.IndexEntrySizeInBytes idxEntrySizeAsInt64 := int64(idxEntrySize) zero := make([]byte, idxEntrySize) zeroStr := string(zero) - idxOffset := int64(entries.HeaderSizeInBytes) + idxOffset := int64(headers.HeaderSizeInBytes) newFileOffset := int64(header.KeyValuesStartPoint) numOfBlocks := int64(header.NumberOfIndexBlocks) @@ -238,7 +252,7 @@ func (bp *BufferPool) CompactFile() error { return e } - kv, e := entries.ExtractKeyValueEntryFromByteArray(kvByteArray, 0) + kv, e := values.ExtractKeyValueEntryFromByteArray(kvByteArray, 0) if e != nil { return e } @@ -252,10 +266,18 @@ func (bp *BufferPool) CompactFile() error { } // update index to have the index of the newly added key-value entry - _, er = newFile.WriteAt(internal.Uint64ToByteArray(uint64(newFileOffset)), idxOffset) + newKvAddr := uint64(newFileOffset) + _, er = newFile.WriteAt(internal.Uint64ToByteArray(newKvAddr), idxOffset) if er != nil && !errors.Is(er, io.EOF) { return er } + + // update search index + err = searchIndex.Add(kv.Key, newKvAddr, kv.Expiry) + if err != nil { + return err + } + // increment the new file offset newFileOffset += kvSize } else { @@ -291,7 +313,7 @@ func (bp *BufferPool) CompactFile() error { // GetValue returns the *entries.KeyValueEntry at the given address if the key there corresponds to the given key // Otherwise, it returns nil. This is to handle hash collisions. -func (bp *BufferPool) GetValue(kvAddress uint64, key []byte) (*entries.KeyValueEntry, error) { +func (bp *BufferPool) GetValue(kvAddress uint64, key []byte) (*values.KeyValueEntry, error) { if kvAddress == 0 { return nil, nil } @@ -319,7 +341,7 @@ func (bp *BufferPool) GetValue(kvAddress uint64, key []byte) (*entries.KeyValueE // update kv_buffers only upto actual data read (cater for partially filled buffer) bp.kvBuffers = append(bp.kvBuffers, NewBuffer(kvAddress, buf[:bytesRead], bp.bufferSize)) - entry, err := entries.ExtractKeyValueEntryFromByteArray(buf, 0) + entry, err := values.ExtractKeyValueEntryFromByteArray(buf, 0) if err != nil { return nil, err } @@ -335,7 +357,7 @@ func (bp *BufferPool) GetValue(kvAddress uint64, key []byte) (*entries.KeyValueE // is the same as the key provided. It returns true if successful func (bp *BufferPool) TryDeleteKvEntry(kvAddress uint64, key []byte) (bool, error) { keySize := int64(len(key)) - addrForIsDeleted := int64(kvAddress+entries.OffsetForKeyInKVArray) + keySize + addrForIsDeleted := int64(kvAddress+values.OffsetForKeyInKVArray) + keySize // loop in reverse, starting at the back // since the latest kv_buffers are the ones updated when new changes occur kvBufLen := len(bp.kvBuffers) @@ -410,7 +432,7 @@ func (bp *BufferPool) AddrBelongsToKey(kvAddress uint64, key []byte) (bool, erro // update kv_buffers only upto actual data read (cater for partially filled buffer) bp.kvBuffers = append(bp.kvBuffers, NewBuffer(kvAddress, buf[:bytesRead], bp.bufferSize)) - keyInFile := buf[entries.OffsetForKeyInKVArray : entries.OffsetForKeyInKVArray+uint64(len(key))] + keyInFile := buf[values.OffsetForKeyInKVArray : values.OffsetForKeyInKVArray+uint64(len(key))] isForKey := bytes.Contains(keyInFile, key) return isForKey, nil } @@ -420,15 +442,15 @@ func (bp *BufferPool) AddrBelongsToKey(kvAddress uint64, key []byte) (bool, erro // If the address is less than [HEADER_SIZE_IN_BYTES] or [BufferPool.key_values_start_point], // an ErrOutOfBounds error is returned func (bp *BufferPool) ReadIndex(addr uint64) ([]byte, error) { - err := internal.ValidateBounds(addr, addr+entries.IndexEntrySizeInBytes, entries.HeaderSizeInBytes, bp.keyValuesStartPoint, "out of index bounds") + err := internal.ValidateBounds(addr, addr+headers.IndexEntrySizeInBytes, headers.HeaderSizeInBytes, bp.keyValuesStartPoint, "out of index bounds") if err != nil { return nil, err } - blockLeftOffset := bp.getBlockLeftOffset(addr, entries.HeaderSizeInBytes) + blockLeftOffset := bp.getBlockLeftOffset(addr, headers.HeaderSizeInBytes) buf, ok := bp.indexBuffers[blockLeftOffset] if ok { - return buf.ReadAt(addr, entries.IndexEntrySizeInBytes) + return buf.ReadAt(addr, headers.IndexEntrySizeInBytes) } data := make([]byte, bp.bufferSize) @@ -443,7 +465,7 @@ func (bp *BufferPool) ReadIndex(addr uint64) ([]byte, error) { if uint64(len(bp.indexBuffers)) >= bp.indexCapacity { biggestLeftOffset := uint64(0) - for lftOffset, _ := range bp.indexBuffers { + for lftOffset := range bp.indexBuffers { if lftOffset >= biggestLeftOffset { biggestLeftOffset = lftOffset } @@ -458,13 +480,18 @@ func (bp *BufferPool) ReadIndex(addr uint64) ([]byte, error) { } start := addr - blockLeftOffset - return data[start : start+entries.IndexEntrySizeInBytes], nil + return data[start : start+headers.IndexEntrySizeInBytes], nil +} + +// GetManyKeyValues gets all the key-value pairs that correspond to the given list of key-value addresses +func (bp *BufferPool) GetManyKeyValues(addrs []uint64) ([]KeyValuePair, error) { + return nil, nil } // readIndexBlock returns the next index block func (bp *BufferPool) readIndexBlock(blockNum int64, blockSize int64) ([]byte, error) { buf := make([]byte, blockSize) - offset := int64(entries.HeaderSizeInBytes) + (blockNum * blockSize) + offset := int64(headers.HeaderSizeInBytes) + (blockNum * blockSize) bytesRead, err := bp.File.ReadAt(buf, offset) if err != nil && !errors.Is(err, io.EOF) { @@ -540,7 +567,7 @@ func getIndexCapacity(numOfIndexBlocks uint64, totalCapacity uint64) uint64 { // initializeDbFile initializes the database file, giving it the header and the index place holders // and truncating it. It returns the new file size -func initializeDbFile(file *os.File, header *entries.DbFileHeader) (int64, error) { +func initializeDbFile(file *os.File, header *headers.DbFileHeader) (int64, error) { headerBytes := header.AsBytes() headerLength := int64(len(headerBytes)) finalSize := headerLength + int64(header.NumberOfIndexBlocks*header.NetBlockSize) @@ -567,7 +594,7 @@ func initializeDbFile(file *os.File, header *entries.DbFileHeader) (int64, error // extractKeyAsByteArrayFromFile extracts the byte array for the key from a given file func extractKeyAsByteArrayFromFile(file *os.File, kvAddr uint64, keySize int64) ([]byte, error) { - offset := int64(kvAddr + entries.OffsetForKeyInKVArray) + offset := int64(kvAddr + values.OffsetForKeyInKVArray) buf := make([]byte, keySize) _, err := file.ReadAt(buf, offset) if err != nil { diff --git a/scdb/internal/buffers/pool_test.go b/scdb/internal/buffers/pool_test.go index cd9f3dc..c1c4681 100644 --- a/scdb/internal/buffers/pool_test.go +++ b/scdb/internal/buffers/pool_test.go @@ -3,7 +3,8 @@ package buffers import ( "bytes" "github.com/sopherapps/go-scdb/scdb/internal" - "github.com/sopherapps/go-scdb/scdb/internal/entries" + "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" + "github.com/sopherapps/go-scdb/scdb/internal/entries/values" "github.com/stretchr/testify/assert" "os" "testing" @@ -40,45 +41,45 @@ func TestNewBufferPool(t *testing.T) { testData := []testRecord{ {nil, fileName, nil, nil, nil, expectedRecord{ bufferSize: uint64(os.Getpagesize()), - maxKeys: entries.DefaultMaxKeys, - redundantBlocks: entries.DefaultRedundantBlocks, + maxKeys: headers.DefaultMaxKeys, + redundantBlocks: headers.DefaultRedundantBlocks, filePath: fileName, - fileSize: entries.NewDbFileHeader(nil, nil, nil).KeyValuesStartPoint, + fileSize: headers.NewDbFileHeader(nil, nil, nil).KeyValuesStartPoint, }}, {&testCapacity, fileName, nil, nil, nil, expectedRecord{ bufferSize: uint64(os.Getpagesize()), - maxKeys: entries.DefaultMaxKeys, - redundantBlocks: entries.DefaultRedundantBlocks, + maxKeys: headers.DefaultMaxKeys, + redundantBlocks: headers.DefaultRedundantBlocks, filePath: fileName, - fileSize: entries.NewDbFileHeader(nil, nil, nil).KeyValuesStartPoint, + fileSize: headers.NewDbFileHeader(nil, nil, nil).KeyValuesStartPoint, }}, {nil, fileName, nil, nil, nil, expectedRecord{ bufferSize: uint64(os.Getpagesize()), - maxKeys: entries.DefaultMaxKeys, - redundantBlocks: entries.DefaultRedundantBlocks, + maxKeys: headers.DefaultMaxKeys, + redundantBlocks: headers.DefaultRedundantBlocks, filePath: fileName, - fileSize: entries.NewDbFileHeader(nil, nil, nil).KeyValuesStartPoint, + fileSize: headers.NewDbFileHeader(nil, nil, nil).KeyValuesStartPoint, }}, {nil, fileName, &testMaxKeys, nil, nil, expectedRecord{ bufferSize: uint64(os.Getpagesize()), maxKeys: testMaxKeys, - redundantBlocks: entries.DefaultRedundantBlocks, + redundantBlocks: headers.DefaultRedundantBlocks, filePath: fileName, - fileSize: entries.NewDbFileHeader(&testMaxKeys, nil, nil).KeyValuesStartPoint, + fileSize: headers.NewDbFileHeader(&testMaxKeys, nil, nil).KeyValuesStartPoint, }}, {nil, fileName, nil, &testRedundantBlocks, nil, expectedRecord{ bufferSize: uint64(os.Getpagesize()), - maxKeys: entries.DefaultMaxKeys, + maxKeys: headers.DefaultMaxKeys, redundantBlocks: testRedundantBlocks, filePath: fileName, - fileSize: entries.NewDbFileHeader(nil, &testRedundantBlocks, nil).KeyValuesStartPoint, + fileSize: headers.NewDbFileHeader(nil, &testRedundantBlocks, nil).KeyValuesStartPoint, }}, {nil, fileName, nil, nil, &testBufferSize, expectedRecord{ bufferSize: uint64(testBufferSize), - maxKeys: entries.DefaultMaxKeys, - redundantBlocks: entries.DefaultRedundantBlocks, + maxKeys: headers.DefaultMaxKeys, + redundantBlocks: headers.DefaultRedundantBlocks, filePath: fileName, - fileSize: entries.NewDbFileHeader(nil, nil, &testBufferSize).KeyValuesStartPoint, + fileSize: headers.NewDbFileHeader(nil, nil, &testBufferSize).KeyValuesStartPoint, }}, } @@ -266,7 +267,7 @@ func TestBufferPool_UpdateIndex(t *testing.T) { if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - offset := entries.HeaderSizeInBytes + 5 + offset := headers.HeaderSizeInBytes + 5 initialFileSize := pool.FileSize writeToFile(t, fileName, int64(offset), data) @@ -303,7 +304,7 @@ func TestBufferPool_UpdateIndex(t *testing.T) { t.Fatalf("error creating new buffer pool: %s", err) } - initialOffset := entries.HeaderSizeInBytes + initialOffset := headers.HeaderSizeInBytes initialFileSize := pool.FileSize writeToFile(t, fileName, int64(initialOffset), initialData) @@ -346,11 +347,11 @@ func TestBufferPool_UpdateIndex(t *testing.T) { t.Fatalf("error creating new buffer pool: %s", err) } - appendIndexBuffer(pool, entries.HeaderSizeInBytes+2, initialData) + appendIndexBuffer(pool, headers.HeaderSizeInBytes+2, initialData) addresses := []uint64{ pool.keyValuesStartPoint + 3, pool.keyValuesStartPoint + 50, - entries.HeaderSizeInBytes - 6, + headers.HeaderSizeInBytes - 6, } for _, addr := range addresses { @@ -392,13 +393,13 @@ func TestBufferPool_ClearFile(t *testing.T) { appendKvBuffer(pool, initialOffset, initialData) appendKvBuffer(pool, 0, headerArray) - headerPreClear, err := entries.ExtractDbFileHeaderFromFile(pool.File) + headerPreClear, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting header from pool file: %s", err) } - kv1 := entries.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) - kv2 := entries.NewKeyValueEntry([]byte("foo"), []byte("baracuda"), uint64(time.Now().Unix()*2)) + kv1 := values.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) + kv2 := values.NewKeyValueEntry([]byte("foo"), []byte("baracuda"), uint64(time.Now().Unix()*2)) insertKeyValueEntry(t, pool, headerPreClear, kv1) insertKeyValueEntry(t, pool, headerPreClear, kv2) @@ -409,7 +410,7 @@ func TestBufferPool_ClearFile(t *testing.T) { } finalFileSize := getActualFileSize(t, fileName) - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting header from pool file: %s", err) } @@ -435,6 +436,7 @@ func TestBufferPool_ClearFile(t *testing.T) { func TestBufferPool_CompactFile(t *testing.T) { fileName := "testdb_pool.scdb" + indexFileName := "testdb_pool.iscdb" defer func() { _ = os.Remove(fileName) }() @@ -443,11 +445,11 @@ func TestBufferPool_CompactFile(t *testing.T) { _ = os.Remove(fileName) futureTimestamp := uint64(time.Now().Unix() * 2) - neverExpires := entries.NewKeyValueEntry([]byte("never_expires"), []byte("bar"), 0) - deleted := entries.NewKeyValueEntry([]byte("deleted"), []byte("bok"), 0) + neverExpires := values.NewKeyValueEntry([]byte("never_expires"), []byte("bar"), 0) + deleted := values.NewKeyValueEntry([]byte("deleted"), []byte("bok"), 0) // 1666023836u64 is some past timestamp in October 2022 - expired := entries.NewKeyValueEntry([]byte("expired"), []byte("bar"), 1666023836) - notExpired := entries.NewKeyValueEntry([]byte("not_expired"), []byte("bar"), futureTimestamp) + expired := values.NewKeyValueEntry([]byte("expired"), []byte("bar"), 1666023836) + notExpired := values.NewKeyValueEntry([]byte("not_expired"), []byte("bar"), futureTimestamp) // Limit the max_keys to 10 otherwise the memory will be consumed when we try to get all data in file maxKeys := uint64(10) @@ -458,7 +460,7 @@ func TestBufferPool_CompactFile(t *testing.T) { appendKvBuffer(pool, 0, []byte{76, 79}) - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting header from file: %s", err) } @@ -474,7 +476,12 @@ func TestBufferPool_CompactFile(t *testing.T) { initialFileSize := getActualFileSize(t, fileName) - err = pool.CompactFile() + searchIndex, err := internal.NewInvertedIndex(indexFileName, nil, nil, nil) + if err != nil { + t.Fatalf("error creating a search index: %s", err) + } + + err = pool.CompactFile(searchIndex) if err != nil { t.Fatalf("error compacting db file: %s", err) } @@ -508,13 +515,13 @@ func TestBufferPool_GetValue(t *testing.T) { }() t.Run("BufferPool_GetValueForNonExistingBufferGetsValueFromFileDirectly", func(t *testing.T) { - kv := entries.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) + kv := values.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -536,13 +543,13 @@ func TestBufferPool_GetValue(t *testing.T) { }) t.Run("BufferPool_GetValueFromExistingBufferGetsValueFromBuffer", func(t *testing.T) { - kv := entries.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) + kv := values.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -571,13 +578,13 @@ func TestBufferPool_GetValue(t *testing.T) { t.Run("BufferPool_GetValueForExpiredValueReturnsNil", func(t *testing.T) { // 1666023836u64 is some past timestamp in October 2022 so this is expired - kv := entries.NewKeyValueEntry([]byte("expires"), []byte("bar"), 1666023836) + kv := values.NewKeyValueEntry([]byte("expires"), []byte("bar"), 1666023836) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -599,13 +606,13 @@ func TestBufferPool_GetValue(t *testing.T) { }) t.Run("BufferPool_GetValueForDeletedValueReturnsNil", func(t *testing.T) { - kv := entries.NewKeyValueEntry([]byte("deleted"), []byte("bar"), 0) + kv := values.NewKeyValueEntry([]byte("deleted"), []byte("bar"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -629,6 +636,150 @@ func TestBufferPool_GetValue(t *testing.T) { }) } +func TestBufferPool_GetManyKeyValues(t *testing.T) { + fileName := "testdb_pool.scdb" + + t.Run("GetManyKeyValuesReturnsTheKeyValuesForTheGivenAddresses", func(t *testing.T) { + defer func() { + _ = os.Remove(fileName) + }() + + table := []KeyValuePair{ + {[]byte("kv"), []byte("bar")}, + {[]byte("hey"), []byte("man")}, + {[]byte("holla"), []byte("pension")}, + {[]byte("putty"), []byte("6788")}, + {[]byte("ninety-nine"), []byte("millenium")}, + } + + pool, err := NewBufferPool(nil, fileName, nil, nil, nil) + if err != nil { + t.Fatalf("error creating the buffer pool: %s", err) + } + + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) + if err != nil { + t.Fatalf("error creating the db file header: %s", err) + } + + addresses := make([]uint64, len(table)) + for _, pair := range table { + kv := values.NewKeyValueEntry(pair.K, pair.V, 0) + insertKeyValueEntry(t, pool, header, kv) + kvAddr := getKvAddress(t, pool, header, kv) + addresses = append(addresses, kvAddr) + } + + got, err := pool.GetManyKeyValues(addresses) + if err != nil { + t.Fatalf("error getting many key values: %s", err) + } + + assert.Equal(t, table, got) + }) + + t.Run("GetManyKeyValuesNeverReturnsExpiredKeyValues", func(t *testing.T) { + defer func() { + _ = os.Remove(fileName) + }() + + nonExpired := []KeyValuePair{ + {[]byte("kv"), []byte("bar")}, + {[]byte("hey"), []byte("man")}} + + expired := []KeyValuePair{ + {[]byte("holla"), []byte("pension")}, + {[]byte("putty"), []byte("6788")}, + {[]byte("ninety-nine"), []byte("millenium")}, + } + + pool, err := NewBufferPool(nil, fileName, nil, nil, nil) + if err != nil { + t.Fatalf("error creating the buffer pool: %s", err) + } + + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) + if err != nil { + t.Fatalf("error creating the db file header: %s", err) + } + + addresses := make([]uint64, len(nonExpired)+len(expired)) + for _, pair := range nonExpired { + kv := values.NewKeyValueEntry(pair.K, pair.V, 0) + insertKeyValueEntry(t, pool, header, kv) + kvAddr := getKvAddress(t, pool, header, kv) + addresses = append(addresses, kvAddr) + } + + for _, pair := range expired { + // 1666023836 is some past timestamp in October 2022 so this is expired + kv := values.NewKeyValueEntry(pair.K, pair.V, uint64(1666023836)) + insertKeyValueEntry(t, pool, header, kv) + kvAddr := getKvAddress(t, pool, header, kv) + addresses = append(addresses, kvAddr) + } + + got, err := pool.GetManyKeyValues(addresses) + if err != nil { + t.Fatalf("error getting many key values: %s", err) + } + + assert.Equal(t, nonExpired, got) + }) + + t.Run("GetManyKeyValuesNeverReturnsDeletedKeyValues", func(t *testing.T) { + defer func() { + _ = os.Remove(fileName) + }() + + deleted := []KeyValuePair{ + {[]byte("kv"), []byte("bar")}, + {[]byte("hey"), []byte("man")}} + + unDeleted := []KeyValuePair{ + {[]byte("holla"), []byte("pension")}, + {[]byte("putty"), []byte("6788")}, + {[]byte("ninety-nine"), []byte("millenium")}, + } + + pool, err := NewBufferPool(nil, fileName, nil, nil, nil) + if err != nil { + t.Fatalf("error creating the buffer pool: %s", err) + } + + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) + if err != nil { + t.Fatalf("error creating the db file header: %s", err) + } + + addresses := make([]uint64, len(unDeleted)+len(deleted)) + for _, pair := range unDeleted { + kv := values.NewKeyValueEntry(pair.K, pair.V, 0) + insertKeyValueEntry(t, pool, header, kv) + kvAddr := getKvAddress(t, pool, header, kv) + addresses = append(addresses, kvAddr) + } + + for _, pair := range deleted { + kv := values.NewKeyValueEntry(pair.K, pair.V, 0) + insertKeyValueEntry(t, pool, header, kv) + kvAddr := getKvAddress(t, pool, header, kv) + addresses = append(addresses, kvAddr) + _, err = pool.TryDeleteKvEntry(kvAddr, kv.Key) + if err != nil { + t.Fatalf("error try deleting kv entry: %s", err) + } + } + + got, err := pool.GetManyKeyValues(addresses) + if err != nil { + t.Fatalf("error getting many key values: %s", err) + } + + assert.Equal(t, unDeleted, got) + }) +} + func TestBufferPool_AddrBelongsToKey(t *testing.T) { fileName := "testdb_pool.scdb" defer func() { @@ -636,14 +787,14 @@ func TestBufferPool_AddrBelongsToKey(t *testing.T) { }() t.Run("BufferPool_AddrBelongsToKeyChecksIfKeyValueEntryAtGivenAddressHasGivenKey", func(t *testing.T) { - kv1 := entries.NewKeyValueEntry([]byte("never"), []byte("bar"), 0) - kv2 := entries.NewKeyValueEntry([]byte("foo"), []byte("baracuda"), 0) + kv1 := values.NewKeyValueEntry([]byte("never"), []byte("bar"), 0) + kv2 := values.NewKeyValueEntry([]byte("foo"), []byte("baracuda"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -684,13 +835,13 @@ func TestBufferPool_AddrBelongsToKey(t *testing.T) { t.Run("BufferPool_AddrBelongsToKeyForAnExpiredKeyReturnsTrue", func(t *testing.T) { // 1666023836 is some past timestamp in October 2022 so this is expired - kv := entries.NewKeyValueEntry([]byte("expires"), []byte("bar"), 1666023836) + kv := values.NewKeyValueEntry([]byte("expires"), []byte("bar"), 1666023836) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -712,13 +863,13 @@ func TestBufferPool_AddrBelongsToKey(t *testing.T) { }) t.Run("BufferPool_AddrBelongsToKeyForOutOfBoundsAddressReturnsFalse", func(t *testing.T) { - kv := entries.NewKeyValueEntry([]byte("foo"), []byte("bar"), 0) + kv := values.NewKeyValueEntry([]byte("foo"), []byte("bar"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -745,14 +896,14 @@ func TestBufferPool_TryDeleteKvEntry(t *testing.T) { _ = os.Remove(fileName) }() - kv1 := entries.NewKeyValueEntry([]byte("never"), []byte("bar"), 0) - kv2 := entries.NewKeyValueEntry([]byte("foo"), []byte("baracuda"), 0) + kv1 := values.NewKeyValueEntry([]byte("never"), []byte("bar"), 0) + kv2 := values.NewKeyValueEntry([]byte("foo"), []byte("baracuda"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -837,13 +988,13 @@ func TestBufferPool_ReadIndex(t *testing.T) { }() t.Run("BufferPool_ReadIndexReadsIndexAtGivenAddressIfAddressIsWithinTheIndexBands", func(t *testing.T) { - kv := entries.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) + kv := values.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -868,13 +1019,13 @@ func TestBufferPool_ReadIndex(t *testing.T) { }) t.Run("BufferPool_ReadIndexReturnsErrorIfGivenAddressIsOutsideTheIndexBands", func(t *testing.T) { - kv := entries.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) + kv := values.NewKeyValueEntry([]byte("kv"), []byte("bar"), 0) pool, err := NewBufferPool(nil, fileName, nil, nil, nil) if err != nil { t.Fatalf("error creating new buffer pool: %s", err) } - header, err := entries.ExtractDbFileHeaderFromFile(pool.File) + header, err := headers.ExtractDbFileHeaderFromFile(pool.File) if err != nil { t.Fatalf("error extracting db file header from file: %s", err) } @@ -882,7 +1033,7 @@ func TestBufferPool_ReadIndex(t *testing.T) { insertKeyValueEntry(t, pool, header, kv) kvAddr := getKvAddress(t, pool, header, kv) - valueAddr := kvAddr + uint64(entries.KeyValueMinSizeInBytes) + uint64(kv.KeySize) + valueAddr := kvAddr + uint64(values.KeyValueMinSizeInBytes) + uint64(kv.KeySize) fileSize := getActualFileSize(t, fileName) testData := []uint64{kvAddr, valueAddr, fileSize} @@ -957,7 +1108,7 @@ func writeToFile(t *testing.T, filePath string, offset int64, data []byte) { } // insertKeyValueEntry inserts a key value entry into the pool, updating the index also -func insertKeyValueEntry(t *testing.T, pool *BufferPool, header *entries.DbFileHeader, kv *entries.KeyValueEntry) { +func insertKeyValueEntry(t *testing.T, pool *BufferPool, header *headers.DbFileHeader, kv *values.KeyValueEntry) { idxAddr := header.GetIndexOffset(kv.Key) kvAddr, err := pool.Append(kv.AsBytes()) if err != nil { @@ -971,8 +1122,8 @@ func insertKeyValueEntry(t *testing.T, pool *BufferPool, header *entries.DbFileH } // getKvAddress returns the address for the given key value entry within the buffer pool -func getKvAddress(t *testing.T, pool *BufferPool, header *entries.DbFileHeader, kv *entries.KeyValueEntry) uint64 { - kvAddr := make([]byte, entries.IndexEntrySizeInBytes) +func getKvAddress(t *testing.T, pool *BufferPool, header *headers.DbFileHeader, kv *values.KeyValueEntry) uint64 { + kvAddr := make([]byte, headers.IndexEntrySizeInBytes) indexAddr := int64(header.GetIndexOffset(kv.Key)) _, err := pool.File.ReadAt(kvAddr, indexAddr) @@ -989,7 +1140,7 @@ func getKvAddress(t *testing.T, pool *BufferPool, header *entries.DbFileHeader, } // deleteKeyValue deletes a given key value in the given pool -func deleteKeyValue(t *testing.T, pool *BufferPool, header *entries.DbFileHeader, kv *entries.KeyValueEntry) { +func deleteKeyValue(t *testing.T, pool *BufferPool, header *headers.DbFileHeader, kv *values.KeyValueEntry) { indexAddr := header.GetIndexOffset(kv.Key) err := pool.UpdateIndex(indexAddr, internal.Uint64ToByteArray(0)) if err != nil { @@ -998,8 +1149,8 @@ func deleteKeyValue(t *testing.T, pool *BufferPool, header *entries.DbFileHeader } // keyValueExists checks whether a given key value entry exists in the data array got from the file -func keyValueExists(t *testing.T, data []byte, header *entries.DbFileHeader, kv *entries.KeyValueEntry) bool { - idxItemSize := entries.IndexEntrySizeInBytes +func keyValueExists(t *testing.T, data []byte, header *headers.DbFileHeader, kv *values.KeyValueEntry) bool { + idxItemSize := headers.IndexEntrySizeInBytes idxAddr := header.GetIndexOffset(kv.Key) kvAddrByteArray := data[idxAddr : idxAddr+idxItemSize] zero := make([]byte, idxItemSize) diff --git a/scdb/internal/entries/db_file_header.go b/scdb/internal/entries/headers/db_file_header.go similarity index 99% rename from scdb/internal/entries/db_file_header.go rename to scdb/internal/entries/headers/db_file_header.go index 0025bb7..e4a9acb 100644 --- a/scdb/internal/entries/db_file_header.go +++ b/scdb/internal/entries/headers/db_file_header.go @@ -1,4 +1,4 @@ -package entries +package headers import ( "fmt" diff --git a/scdb/internal/entries/db_file_header_test.go b/scdb/internal/entries/headers/db_file_header_test.go similarity index 99% rename from scdb/internal/entries/db_file_header_test.go rename to scdb/internal/entries/headers/db_file_header_test.go index 837e277..15d039a 100644 --- a/scdb/internal/entries/db_file_header_test.go +++ b/scdb/internal/entries/headers/db_file_header_test.go @@ -1,4 +1,4 @@ -package entries +package headers import ( "fmt" diff --git a/scdb/internal/entries/key_value.go b/scdb/internal/entries/values/key_value.go similarity index 99% rename from scdb/internal/entries/key_value.go rename to scdb/internal/entries/values/key_value.go index 0a75b63..6cacd1f 100644 --- a/scdb/internal/entries/key_value.go +++ b/scdb/internal/entries/values/key_value.go @@ -1,4 +1,4 @@ -package entries +package values import ( "github.com/sopherapps/go-scdb/scdb/internal" diff --git a/scdb/internal/entries/key_value_test.go b/scdb/internal/entries/values/key_value_test.go similarity index 99% rename from scdb/internal/entries/key_value_test.go rename to scdb/internal/entries/values/key_value_test.go index 080fab9..23f4653 100644 --- a/scdb/internal/entries/key_value_test.go +++ b/scdb/internal/entries/values/key_value_test.go @@ -1,4 +1,4 @@ -package entries +package values import ( "fmt" diff --git a/scdb/internal/inverted_index.go b/scdb/internal/inverted_index.go new file mode 100644 index 0000000..28b499f --- /dev/null +++ b/scdb/internal/inverted_index.go @@ -0,0 +1,56 @@ +package internal + +import "os" + +type InvertedIndex struct { + File *os.File +} + +// NewInvertedIndex initializes a new Inverted Index +// +// The max keys used in the search file are `max_index_key_len` * `db_max_keys` +// Since we each db key will be represented in the index a number of `max_index_key_len` times +// for example the key `food` must have the following index keys: `f`, `fo`, `foo`, `food`. +func NewInvertedIndex(filePath string, maxIndexKeyLen *uint32, dbMaxKeys *uint64, dbRedundantBlocks *uint16) (*InvertedIndex, error) { + return nil, nil +} + +// Add adds a key's kv address in the corresponding prefixes' lists to update the inverted index +func (idx *InvertedIndex) Add(key []byte, kvAddr uint64, expiry uint64) error { + return nil +} + +// Search returns list of db key-value addresses corresponding to the given term +// +// # The addresses can then be used to get the list of key-values from the db +// +// It skips the first `skip` number of results and returns not more than +// `limit` number of items. This is to avoid using up more memory than can be handled by the +// host machine. +// +// If `limit` is 0, all items are returned since it would make no sense for someone to search +// for zero items. +func (idx *InvertedIndex) Search(term []byte, skip uint64, limit uint64) ([]uint64, error) { + return nil, nil +} + +// Remove deletes the key's kv address from all prefixes' lists in the inverted index +func (idx *InvertedIndex) Remove(key []byte) error { + return nil +} + +// Clear clears all the data in the search index, except the header, and its original +// variables +func (idx *InvertedIndex) Clear() error { + return nil +} + +// Eq checks if the other InvertedIndex instance equals the current inverted index +func (idx *InvertedIndex) Eq(other *InvertedIndex) bool { + return false +} + +// Close closes the buffer pool, freeing up any resources +func (idx *InvertedIndex) Close() error { + return idx.File.Close() +} diff --git a/scdb/internal/inverted_index_test.go b/scdb/internal/inverted_index_test.go new file mode 100644 index 0000000..5bf0569 --- /dev/null +++ b/scdb/internal/inverted_index_test.go @@ -0,0 +1 @@ +package internal diff --git a/scdb/store.go b/scdb/store.go index 1248ee7..15cf213 100644 --- a/scdb/store.go +++ b/scdb/store.go @@ -5,7 +5,8 @@ import ( "github.com/sopherapps/go-scdb/scdb/errors" "github.com/sopherapps/go-scdb/scdb/internal" "github.com/sopherapps/go-scdb/scdb/internal/buffers" - "github.com/sopherapps/go-scdb/scdb/internal/entries" + "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" + "github.com/sopherapps/go-scdb/scdb/internal/entries/values" "os" "path/filepath" "sync" @@ -15,6 +16,9 @@ import ( // defaultDbFile is the default name of the database file that contains all the key-value pairs const defaultDbFile string = "dump.scdb" +// defaultSearchIndexFile is the default name of the inverted index file that is for doing full-text search +const defaultSearchIndexFile string = "index.iscdb" + var zeroU64 = internal.Uint64ToByteArray(0) // Store is a key-value store that persists key-value pairs to disk @@ -23,11 +27,12 @@ var zeroU64 = internal.Uint64ToByteArray(0) // on disk. It allows for specifying how long each key-value pair should be // kept for i.e. the time-to-live in seconds. If None is provided, they last indefinitely. type Store struct { - bufferPool *buffers.BufferPool - header *entries.DbFileHeader - closeCh chan bool - mu sync.Mutex - isClosed bool + bufferPool *buffers.BufferPool + header *headers.DbFileHeader + searchIndex *internal.InvertedIndex + closeCh chan bool + mu sync.Mutex + isClosed bool } // New creates a new Store at the given path @@ -65,7 +70,7 @@ type Store struct { // A new key-value pair is created and the old one is left unindexed. // Compaction is important because it reclaims this space and reduces the size // of the database file. -func New(path string, maxKeys *uint64, redundantBlocks *uint16, poolCapacity *uint64, compactionInterval *uint32) (*Store, error) { +func New(path string, maxKeys *uint64, redundantBlocks *uint16, poolCapacity *uint64, compactionInterval *uint32, maxIndexKeyLen *uint32) (*Store, error) { err := os.MkdirAll(path, 0755) if err != nil { return nil, err @@ -77,7 +82,13 @@ func New(path string, maxKeys *uint64, redundantBlocks *uint16, poolCapacity *ui return nil, err } - header, err := entries.ExtractDbFileHeaderFromFile(bufferPool.File) + header, err := headers.ExtractDbFileHeaderFromFile(bufferPool.File) + if err != nil { + return nil, err + } + + searchIndexFilePath := filepath.Join(path, defaultSearchIndexFile) + searchIndex, err := internal.NewInvertedIndex(searchIndexFilePath, maxIndexKeyLen, maxKeys, redundantBlocks) if err != nil { return nil, err } @@ -88,9 +99,10 @@ func New(path string, maxKeys *uint64, redundantBlocks *uint16, poolCapacity *ui } store := &Store{ - bufferPool: bufferPool, - header: header, - closeCh: make(chan bool), + bufferPool: bufferPool, + header: header, + searchIndex: searchIndex, + closeCh: make(chan bool), } go store.startBackgroundCompaction(interval) @@ -139,12 +151,19 @@ func (s *Store) Set(k []byte, v []byte, ttl *uint64) error { } if isOffsetForKey { - kv := entries.NewKeyValueEntry(k, v, expiry) + kv := values.NewKeyValueEntry(k, v, expiry) prevLastOffset, err := s.bufferPool.Append(kv.AsBytes()) if err != nil { return err } - return s.bufferPool.UpdateIndex(indexOffset, internal.Uint64ToByteArray(prevLastOffset)) + + err = s.bufferPool.UpdateIndex(indexOffset, internal.Uint64ToByteArray(prevLastOffset)) + if err != nil { + return err + } + + // Update the search index + return s.searchIndex.Add(k, prevLastOffset, expiry) } } @@ -192,11 +211,39 @@ func (s *Store) Get(k []byte) ([]byte, error) { return nil, nil } +// Search searches for unexpired keys that start with the given search term +// +// It skips the first `skip` (default: 0) number of results and returns not more than +// `limit` (default: 0) number of items. This is to avoid using up more memory than can be handled by the +// host machine. +// +// If `limit` is 0, all items are returned since it would make no sense for someone to search +// for zero items. +// +// returns a list of pairs of key-value i.e. `buffers.KeyValuePair` +func (s *Store) Search(term []byte, skip uint64, limit uint64) ([]buffers.KeyValuePair, error) { + s.mu.Lock() + defer s.mu.Unlock() + + addrs, err := s.searchIndex.Search(term, skip, limit) + if err != nil { + return nil, err + } + + return s.bufferPool.GetManyKeyValues(addrs) +} + // Delete removes the key-value for the given key func (s *Store) Delete(k []byte) error { s.mu.Lock() defer s.mu.Unlock() + done := make(chan error) + // Update the search index in a separate goroutine. + go func(ch chan<- error) { + ch <- s.searchIndex.Remove(k) + }(done) + initialIdxOffset := s.header.GetIndexOffset(k) for idxBlock := uint64(0); idxBlock < s.header.NumberOfIndexBlocks; idxBlock++ { @@ -229,8 +276,10 @@ func (s *Store) Delete(k []byte) error { } // else continue looping } + // if it is not found, no error is thrown - return nil // if it is not found, no error is thrown + err := <-done // wait for search index to complete + return err } // Clear removes all data in the store @@ -238,7 +287,19 @@ func (s *Store) Clear() error { s.mu.Lock() defer s.mu.Unlock() - return s.bufferPool.ClearFile() + done := make(chan error) + // Update the search index in a separate goroutine. + go func(ch chan<- error) { + ch <- s.searchIndex.Clear() + }(done) + + err := s.bufferPool.ClearFile() + if err != nil { + return err + } + + err = <-done // wait for search index goroutine to end + return err } // Compact manually removes dangling key-value pairs in the database file @@ -260,7 +321,7 @@ func (s *Store) Compact() error { s.mu.Lock() defer s.mu.Unlock() - return s.bufferPool.CompactFile() + return s.bufferPool.CompactFile(s.searchIndex) } // Close frees up any resources occupied by store. @@ -283,7 +344,13 @@ func (s *Store) Close() error { return err } + err = s.searchIndex.Close() + if err != nil { + return err + } + s.header = nil + s.searchIndex = nil return nil } diff --git a/scdb/store_test.go b/scdb/store_test.go index be24b71..d89e9d2 100644 --- a/scdb/store_test.go +++ b/scdb/store_test.go @@ -2,6 +2,7 @@ package scdb import ( "fmt" + "github.com/sopherapps/go-scdb/scdb/internal/buffers" "github.com/stretchr/testify/assert" "log" "os" @@ -26,6 +27,30 @@ var RECORDS = []testRecord{ {[]byte("mulimuta"), []byte("Runyoro")}, } +var SEARCH_RECORDS = []testRecord{ + {[]byte("foo"), []byte("eng")}, + {[]byte("fore"), []byte("span")}, + {[]byte("food"), []byte("lug")}, + {[]byte("bar"), []byte("port")}, + {[]byte("band"), []byte("nyoro")}, + {[]byte("pig"), []byte("dan")}, +} + +var SEARCH_TERMS = [][]byte{ + []byte("f"), + []byte("fo"), + []byte("foo"), + []byte("for"), + []byte("b"), + []byte("ba"), + []byte("bar"), + []byte("ban"), + []byte("pigg"), + []byte("p"), + []byte("pi"), + []byte("pig"), +} + func TestStore_Get(t *testing.T) { dbPath := "testdb_get" removeStore(t, dbPath) @@ -51,6 +76,167 @@ func TestStore_Get(t *testing.T) { }) } +func TestStore_Search(t *testing.T) { + dbPath := "testdb_search" + removeStore(t, dbPath) + store := createStore(t, dbPath, nil) + defer func() { + _ = store.Close() + removeStore(t, dbPath) + }() + + type testParams struct { + term []byte + skip uint64 + limit uint64 + expected []buffers.KeyValuePair + } + + t.Run("SearchWithoutPaginationReturnsAllMatchedKeyValues", func(t *testing.T) { + table := []testParams{ + {[]byte("f"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("foo"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("food"), 0, 0, []buffers.KeyValuePair{{[]byte("food"), []byte("lug")}}}, + {[]byte("for"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("b"), 0, 0, []buffers.KeyValuePair{{[]byte("bar"), []byte("port")}, {[]byte("band"), []byte("nyoro")}}}, + {[]byte("ba"), 0, 0, []buffers.KeyValuePair{{[]byte("bar"), []byte("port")}, {[]byte("band"), []byte("nyoro")}}}, + {[]byte("bar"), 0, 0, []buffers.KeyValuePair{{[]byte("bar"), []byte("port")}}}, + {[]byte("ban"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, + {[]byte("band"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, + {[]byte("p"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pigg"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("bandana"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("bare"), 0, 0, []buffers.KeyValuePair{}}, + } + + insertRecords(t, store, SEARCH_RECORDS, nil) + for _, rec := range table { + got, err := store.Search(rec.term, rec.skip, rec.limit) + if err != nil { + t.Fatalf("error searching: %s", err) + } + + assert.Equal(t, rec.expected, got) + } + }) + + t.Run("SearchWithPaginationSkipsSomeAndReturnsNotMoreThanLimit", func(t *testing.T) { + table := []testParams{ + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("fo"), 0, 8, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("fo"), 1, 8, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("fo"), 1, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("fo"), 0, 2, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}}}, + {[]byte("fo"), 1, 2, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, + {[]byte("fo"), 0, 1, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}}}, + {[]byte("fo"), 2, 1, []buffers.KeyValuePair{{[]byte("food"), []byte("lug")}}}, + {[]byte("fo"), 1, 1, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + } + + insertRecords(t, store, SEARCH_RECORDS, nil) + for _, rec := range table { + got, err := store.Search(rec.term, rec.skip, rec.limit) + if err != nil { + t.Fatalf("error searching: %s", err) + } + + assert.Equal(t, rec.expected, got) + } + }) + + t.Run("SearchAfterExpirationReturnsNoExpiredKeysValues", func(t *testing.T) { + table := []testParams{ + {[]byte("f"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("foo"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("for"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("b"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, + {[]byte("ba"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, + {[]byte("bar"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("ban"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, + {[]byte("band"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, + {[]byte("p"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pigg"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("food"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("bandana"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("bare"), 0, 0, []buffers.KeyValuePair{}}, + } + recordsToExpire := []testRecord{RECORDS[0], RECORDS[2], RECORDS[3]} + ttl := uint64(1) + insertRecords(t, store, SEARCH_RECORDS, nil) + insertRecords(t, store, recordsToExpire, &ttl) + + // wait for some items to expire + time.Sleep(2 * time.Second) + for _, rec := range table { + got, err := store.Search(rec.term, rec.skip, rec.limit) + if err != nil { + t.Fatalf("error searching: %s", err) + } + + assert.Equal(t, rec.expected, got) + } + }) + + t.Run("SearchAfterDeleteReturnsNoDeletedKeyValues", func(t *testing.T) { + table := []testParams{ + {[]byte("f"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("foo"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("for"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("b"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("ba"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("bar"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("ban"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("band"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("p"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("pigg"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("food"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("bandana"), 0, 0, []buffers.KeyValuePair{}}, + {[]byte("bare"), 0, 0, []buffers.KeyValuePair{}}, + } + keysToDelete := [][]byte{[]byte("foo"), []byte("food"), []byte("bar"), []byte("band")} + + insertRecords(t, store, SEARCH_RECORDS, nil) + deleteRecords(t, store, keysToDelete) + + //for (term, expected) in test_data: + for _, rec := range table { + got, err := store.Search(rec.term, rec.skip, rec.limit) + if err != nil { + t.Fatalf("error searching: %s", err) + } + + assert.Equal(t, rec.expected, got) + } + }) + + t.Run("SearchAfterClearReturnsAnEmptyList", func(t *testing.T) { + insertRecords(t, store, SEARCH_RECORDS, nil) + err := store.Clear() + if err != nil { + t.Fatalf("error clearing: %s", err) + } + + for _, term := range SEARCH_TERMS { + got, err := store.Search(term, 0, 0) + if err != nil { + t.Fatalf("error searching: %s", err) + } + + assert.Equal(t, []buffers.KeyValuePair{}, got) + } + }) + +} + func TestStore_Set(t *testing.T) { dbPath := "testdb_set" removeStore(t, dbPath) @@ -481,6 +667,48 @@ func BenchmarkStore_GetWithTtl(b *testing.B) { } } +func BenchmarkStore_SearchWithoutPagination(b *testing.B) { + dbPath := "testdb_search" + defer removeStoreForBenchmarks(b, dbPath) + + // Create new store + store := createStoreForBenchmarks(b, dbPath, nil) + defer func() { + _ = store.Close() + }() + + insertRecordsForBenchmarks(b, store, SEARCH_RECORDS, nil) + + for _, term := range SEARCH_TERMS { + b.Run(fmt.Sprintf("Search (no pagination) %s", term), func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = store.Search(term, 0, 0) + } + }) + } +} + +func BenchmarkStore_SearchWithPagination(b *testing.B) { + dbPath := "testdb_search" + defer removeStoreForBenchmarks(b, dbPath) + + // Create new store + store := createStoreForBenchmarks(b, dbPath, nil) + defer func() { + _ = store.Close() + }() + + insertRecordsForBenchmarks(b, store, SEARCH_RECORDS, nil) + + for _, term := range SEARCH_TERMS { + b.Run(fmt.Sprintf("Search (paginated) %s", term), func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = store.Search(term, 1, 2) + } + }) + } +} + func BenchmarkStore_SetWithoutTtl(b *testing.B) { dbPath := "testdb_set" defer removeStoreForBenchmarks(b, dbPath) @@ -527,13 +755,15 @@ func ExampleNew() { var redundantBlocks uint16 = 1 var poolCapacity uint64 = 10 var compactionInterval uint32 = 1_800 + var maxIndexKeyLen uint32 = 3 store, err := New( "testdb", &maxKeys, &redundantBlocks, &poolCapacity, - &compactionInterval) + &compactionInterval, + &maxIndexKeyLen) if err != nil { log.Fatalf("error opening store: %s", err) } @@ -543,7 +773,7 @@ func ExampleNew() { } func ExampleStore_Set() { - store, err := New("testdb", nil, nil, nil, nil) + store, err := New("testdb", nil, nil, nil, nil, nil) if err != nil { log.Fatalf("error opening store: %s", err) } @@ -564,7 +794,7 @@ func ExampleStore_Set() { } func ExampleStore_Get() { - store, err := New("testdb", nil, nil, nil, nil) + store, err := New("testdb", nil, nil, nil, nil, nil) if err != nil { log.Fatalf("error opening store: %s", err) } @@ -585,8 +815,54 @@ func ExampleStore_Get() { fmt.Printf("%s", value) // Output: bar } + +func ExampleStore_Search() { + store, err := New("testdb", nil, nil, nil, nil, nil) + if err != nil { + log.Fatalf("error opening store: %s", err) + } + defer func() { + _ = store.Close() + }() + + data := []buffers.KeyValuePair{ + {K: []byte("hi"), V: []byte("ooliyo")}, + {K: []byte("high"), V: []byte("haiguru")}, + {K: []byte("hind"), V: []byte("enyuma")}, + {K: []byte("hill"), V: []byte("akasozi")}, + {K: []byte("him"), V: []byte("ogwo")}, + } + + for _, rec := range data { + err = store.Set(rec.K, rec.V, nil) + if err != nil { + log.Fatalf("error setting key value: %s", err) + } + } + + // without pagination + kvs, err := store.Search([]byte("hi"), 0, 0) + if err != nil { + log.Fatalf("error searching 'hi': %s", err) + } + + fmt.Printf("\nno pagination: %v", kvs) + + // with pagination: get last three + kvs, err = store.Search([]byte("hi"), 2, 3) + if err != nil { + log.Fatalf("error searching (paginated) 'hi': %s", err) + } + + fmt.Printf("\nskip 2, limit 3: %v", kvs) + + // Output: + // no pagination: [hi: ooliyo high: haiguru hind: enyuma hill: akasozi him: ogwo] + // skip 2, limit 3: [hind: enyuma hill: akasozi him: ogwo] +} + func ExampleStore_Delete() { - store, err := New("testdb", nil, nil, nil, nil) + store, err := New("testdb", nil, nil, nil, nil, nil) if err != nil { log.Fatalf("error opening store: %s", err) } @@ -618,7 +894,7 @@ func removeStoreForBenchmarks(b *testing.B, path string) { // createStore is a utility to create a store at the given path func createStore(t *testing.T, path string, compactionInterval *uint32) *Store { - store, err := New(path, nil, nil, nil, compactionInterval) + store, err := New(path, nil, nil, nil, compactionInterval, nil) if err != nil { t.Fatalf("error opening store: %s", err) } @@ -627,7 +903,7 @@ func createStore(t *testing.T, path string, compactionInterval *uint32) *Store { // createStoreForBenchmarks is a utility to create a store at the given path func createStoreForBenchmarks(b *testing.B, path string, compactionInterval *uint32) *Store { - store, err := New(path, nil, nil, nil, compactionInterval) + store, err := New(path, nil, nil, nil, compactionInterval, nil) if err != nil { b.Fatalf("error opening store: %s", err) } From f83a06923a95a6838bb4a9267bd7f0f477778c92 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 03:11:48 +0300 Subject: [PATCH 02/15] Add inverted index header --- scdb/internal/buffers/pool.go | 84 ++- scdb/internal/buffers/pool_test.go | 10 +- .../entries/headers/db_file_header.go | 79 +-- .../entries/headers/db_file_header_test.go | 8 +- .../entries/headers/inverted_index_header.go | 156 +++++ .../headers/inverted_index_header_test.go | 546 ++++++++++++++++++ scdb/internal/entries/headers/shared.go | 125 ++++ scdb/store.go | 12 +- 8 files changed, 936 insertions(+), 84 deletions(-) create mode 100644 scdb/internal/entries/headers/inverted_index_header.go create mode 100644 scdb/internal/entries/headers/inverted_index_header_test.go create mode 100644 scdb/internal/entries/headers/shared.go diff --git a/scdb/internal/buffers/pool.go b/scdb/internal/buffers/pool.go index c17d3e4..4b5320b 100644 --- a/scdb/internal/buffers/pool.go +++ b/scdb/internal/buffers/pool.go @@ -82,7 +82,7 @@ func NewBufferPool(capacity *uint64, filePath string, maxKeys *uint64, redundant var header *headers.DbFileHeader if !dbFileExists { header = headers.NewDbFileHeader(maxKeys, redundantBlocks, &bufSize) - _, err = initializeDbFile(file, header) + _, err = headers.InitializeFile(file, header) if err != nil { return nil, err } @@ -188,7 +188,7 @@ func (bp *BufferPool) UpdateIndex(addr uint64, data []byte) error { func (bp *BufferPool) ClearFile() error { bufSize := uint32(bp.bufferSize) header := headers.NewDbFileHeader(&bp.maxKeys, &bp.redundantBlocks, &bufSize) - fileSize, err := initializeDbFile(bp.File, header) + fileSize, err := headers.InitializeFile(bp.File, header) if err != nil { return err } @@ -485,7 +485,58 @@ func (bp *BufferPool) ReadIndex(addr uint64) ([]byte, error) { // GetManyKeyValues gets all the key-value pairs that correspond to the given list of key-value addresses func (bp *BufferPool) GetManyKeyValues(addrs []uint64) ([]KeyValuePair, error) { - return nil, nil + results := make([]KeyValuePair, 0, len(addrs)) + + for _, addr := range addrs { + addr := int64(addr) + size, err := bp.readKvSize(addr) + if err != nil { + return nil, err + } + + buf, err := bp.readKvBytes(addr, size) + if err != nil { + return nil, err + } + + entry, err := values.ExtractKeyValueEntryFromByteArray(buf, 0) + if err != nil { + return nil, err + } + + if !entry.IsDeleted && entry.IsExpired() { + results = append(results, KeyValuePair{ + K: entry.Key, + V: entry.Value, + }) + } + } + + return results, nil +} + +// readKvBytes reads the key-value byte array directly from file given address and size +func (bp *BufferPool) readKvBytes(addr int64, size uint32) ([]byte, error) { + buf := make([]byte, size) + + _, err := bp.File.ReadAt(buf, addr) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + + return buf, nil +} + +// readSize reads the size of a key-value entry directly from file +func (bp *BufferPool) readKvSize(addr int64) (uint32, error) { + buf := make([]byte, 4) + + _, err := bp.File.ReadAt(buf, addr) + if err != nil && !errors.Is(err, io.EOF) { + return 0, err + } + + return internal.Uint32FromByteArray(buf) } // readIndexBlock returns the next index block @@ -565,33 +616,6 @@ func getIndexCapacity(numOfIndexBlocks uint64, totalCapacity uint64) uint64 { return uint64(math.Max(1, math.Min(idxCap, float64(numOfIndexBlocks)))) } -// initializeDbFile initializes the database file, giving it the header and the index place holders -// and truncating it. It returns the new file size -func initializeDbFile(file *os.File, header *headers.DbFileHeader) (int64, error) { - headerBytes := header.AsBytes() - headerLength := int64(len(headerBytes)) - finalSize := headerLength + int64(header.NumberOfIndexBlocks*header.NetBlockSize) - - // First shrink file to zero, to delete all data - err := file.Truncate(0) - if err != nil { - return 0, err - } - - // The expand the file again - err = file.Truncate(finalSize) - if err != nil { - return 0, err - } - - _, err = file.WriteAt(headerBytes, 0) - if err != nil { - return 0, err - } - - return finalSize, nil -} - // extractKeyAsByteArrayFromFile extracts the byte array for the key from a given file func extractKeyAsByteArrayFromFile(file *os.File, kvAddr uint64, keySize int64) ([]byte, error) { offset := int64(kvAddr + values.OffsetForKeyInKVArray) diff --git a/scdb/internal/buffers/pool_test.go b/scdb/internal/buffers/pool_test.go index c1c4681..451aa15 100644 --- a/scdb/internal/buffers/pool_test.go +++ b/scdb/internal/buffers/pool_test.go @@ -1001,7 +1001,7 @@ func TestBufferPool_ReadIndex(t *testing.T) { insertKeyValueEntry(t, pool, header, kv) - indexAddr := header.GetIndexOffset(kv.Key) + indexAddr := headers.GetIndexOffset(header, kv.Key) kvAddr := getKvAddress(t, pool, header, kv) got, err := pool.ReadIndex(indexAddr) @@ -1109,7 +1109,7 @@ func writeToFile(t *testing.T, filePath string, offset int64, data []byte) { // insertKeyValueEntry inserts a key value entry into the pool, updating the index also func insertKeyValueEntry(t *testing.T, pool *BufferPool, header *headers.DbFileHeader, kv *values.KeyValueEntry) { - idxAddr := header.GetIndexOffset(kv.Key) + idxAddr := headers.GetIndexOffset(header, kv.Key) kvAddr, err := pool.Append(kv.AsBytes()) if err != nil { t.Fatalf("error appending kv: %s", err) @@ -1124,7 +1124,7 @@ func insertKeyValueEntry(t *testing.T, pool *BufferPool, header *headers.DbFileH // getKvAddress returns the address for the given key value entry within the buffer pool func getKvAddress(t *testing.T, pool *BufferPool, header *headers.DbFileHeader, kv *values.KeyValueEntry) uint64 { kvAddr := make([]byte, headers.IndexEntrySizeInBytes) - indexAddr := int64(header.GetIndexOffset(kv.Key)) + indexAddr := int64(headers.GetIndexOffset(header, kv.Key)) _, err := pool.File.ReadAt(kvAddr, indexAddr) if err != nil { @@ -1141,7 +1141,7 @@ func getKvAddress(t *testing.T, pool *BufferPool, header *headers.DbFileHeader, // deleteKeyValue deletes a given key value in the given pool func deleteKeyValue(t *testing.T, pool *BufferPool, header *headers.DbFileHeader, kv *values.KeyValueEntry) { - indexAddr := header.GetIndexOffset(kv.Key) + indexAddr := headers.GetIndexOffset(header, kv.Key) err := pool.UpdateIndex(indexAddr, internal.Uint64ToByteArray(0)) if err != nil { t.Fatalf("error updating index: %s", err) @@ -1151,7 +1151,7 @@ func deleteKeyValue(t *testing.T, pool *BufferPool, header *headers.DbFileHeader // keyValueExists checks whether a given key value entry exists in the data array got from the file func keyValueExists(t *testing.T, data []byte, header *headers.DbFileHeader, kv *values.KeyValueEntry) bool { idxItemSize := headers.IndexEntrySizeInBytes - idxAddr := header.GetIndexOffset(kv.Key) + idxAddr := headers.GetIndexOffset(header, kv.Key) kvAddrByteArray := data[idxAddr : idxAddr+idxItemSize] zero := make([]byte, idxItemSize) diff --git a/scdb/internal/entries/headers/db_file_header.go b/scdb/internal/entries/headers/db_file_header.go index e4a9acb..8075b51 100644 --- a/scdb/internal/entries/headers/db_file_header.go +++ b/scdb/internal/entries/headers/db_file_header.go @@ -4,15 +4,9 @@ import ( "fmt" "github.com/sopherapps/go-scdb/scdb/errors" "github.com/sopherapps/go-scdb/scdb/internal" - "math" "os" ) -const IndexEntrySizeInBytes uint64 = 8 -const HeaderSizeInBytes uint64 = 100 -const DefaultMaxKeys uint64 = 1_000_000 -const DefaultRedundantBlocks uint16 = 1 - type DbFileHeader struct { Title []byte BlockSize uint32 @@ -48,7 +42,7 @@ func NewDbFileHeader(maxKeys *uint64, redundantBlocks *uint16, blockSize *uint32 header.BlockSize = uint32(os.Getpagesize()) } - header.updateDerivedProps() + updateDerivedProps(&header) return &header } @@ -81,36 +75,21 @@ func ExtractDbFileHeaderFromByteArray(data []byte) (*DbFileHeader, error) { RedundantBlocks: redundantBlocks, } - header.updateDerivedProps() + updateDerivedProps(&header) return &header, nil } // ExtractDbFileHeaderFromFile extracts the header from a database file func ExtractDbFileHeaderFromFile(file *os.File) (*DbFileHeader, error) { - buf := make([]byte, HeaderSizeInBytes) - n, err := file.ReadAt(buf, 0) - if n < int(HeaderSizeInBytes) { - return nil, errors.NewErrOutOfBounds(fmt.Sprintf("header length is %d. expected %d", n, HeaderSizeInBytes)) - } - + data, err := readHeaderFile(file) if err != nil { return nil, err } - return ExtractDbFileHeaderFromByteArray(buf) + return ExtractDbFileHeaderFromByteArray(data) } -// updateDerivedProps computes the properties that depend on the user-defined/default properties and update them -// on self -func (h *DbFileHeader) updateDerivedProps() { - h.ItemsPerIndexBlock = uint64(math.Floor(float64(h.BlockSize) / float64(IndexEntrySizeInBytes))) - h.NumberOfIndexBlocks = uint64(math.Ceil(float64(h.MaxKeys)/float64(h.ItemsPerIndexBlock))) + uint64(h.RedundantBlocks) - h.NetBlockSize = h.ItemsPerIndexBlock * IndexEntrySizeInBytes - h.KeyValuesStartPoint = HeaderSizeInBytes + (h.NetBlockSize * h.NumberOfIndexBlocks) -} - -// AsBytes retrieves the byte array that represents the header. func (h *DbFileHeader) AsBytes() []byte { return internal.ConcatByteArrays( h.Title, @@ -121,20 +100,42 @@ func (h *DbFileHeader) AsBytes() []byte { ) } -// GetIndexOffset computes the offset for the given key in the first index block. -// It uses the meta data in this header -// i.e. number of items per block and the `IndexEntrySizeInBytes` -func (h *DbFileHeader) GetIndexOffset(key []byte) uint64 { - hash := internal.GetHash(key, h.ItemsPerIndexBlock) - return HeaderSizeInBytes + (hash * IndexEntrySizeInBytes) +func (h *DbFileHeader) GetItemsPerIndexBlock() uint64 { + return h.ItemsPerIndexBlock } -// GetIndexOffsetInNthBlock returns the index offset for the nth index block if `initialOffset` is the offset -// in the top most index block `n` starts at zero where zero is the top most index block -func (h *DbFileHeader) GetIndexOffsetInNthBlock(initialOffset uint64, n uint64) (uint64, error) { - if n >= h.NumberOfIndexBlocks { - return 0, errors.NewErrOutOfBounds(fmt.Sprintf("n %d is out of bounds", n)) - } - num := initialOffset + (h.NetBlockSize * n) - return num, nil +func (h *DbFileHeader) GetNumberOfIndexBlocks() uint64 { + return h.NumberOfIndexBlocks +} + +func (h *DbFileHeader) GetNetBlockSize() uint64 { + return h.NetBlockSize +} + +func (h *DbFileHeader) GetBlockSize() uint32 { + return h.BlockSize +} + +func (h *DbFileHeader) GetMaxKeys() uint64 { + return h.MaxKeys +} + +func (h *DbFileHeader) GetRedundantBlocks() uint16 { + return h.RedundantBlocks +} + +func (h *DbFileHeader) SetItemsPerIndexBlock(u uint64) { + h.ItemsPerIndexBlock = u +} + +func (h *DbFileHeader) SetNumberOfIndexBlocks(u uint64) { + h.NumberOfIndexBlocks = u +} + +func (h *DbFileHeader) SetNetBlockSize(u uint64) { + h.NetBlockSize = u +} + +func (h *DbFileHeader) SetValuesStartPoint(u uint64) { + h.KeyValuesStartPoint = u } diff --git a/scdb/internal/entries/headers/db_file_header_test.go b/scdb/internal/entries/headers/db_file_header_test.go index 15d039a..a4a9658 100644 --- a/scdb/internal/entries/headers/db_file_header_test.go +++ b/scdb/internal/entries/headers/db_file_header_test.go @@ -401,7 +401,7 @@ func TestDbFileHeader_AsBytes(t *testing.T) { func TestDbFileHeader_GetIndexOffset(t *testing.T) { dbHeader := NewDbFileHeader(nil, nil, nil) - offset := dbHeader.GetIndexOffset([]byte("foo")) + offset := GetIndexOffset(dbHeader, []byte("foo")) block1Start := HeaderSizeInBytes block1End := dbHeader.NetBlockSize + block1Start assert.LessOrEqual(t, block1Start, offset) @@ -410,14 +410,14 @@ func TestDbFileHeader_GetIndexOffset(t *testing.T) { func TestDbFileHeader_GetIndexOffsetInNthBlock(t *testing.T) { dbHeader := NewDbFileHeader(nil, nil, nil) - initialOffset := dbHeader.GetIndexOffset([]byte("foo")) + initialOffset := GetIndexOffset(dbHeader, []byte("foo")) numberOfBlocks := dbHeader.NumberOfIndexBlocks t.Run("GetIndexOffsetInNthBlockWorksAsExpected", func(t *testing.T) { for i := uint64(0); i < numberOfBlocks; i++ { blockStart := HeaderSizeInBytes + (i * dbHeader.NetBlockSize) blockEnd := dbHeader.NetBlockSize + blockStart - offset, err := dbHeader.GetIndexOffsetInNthBlock(initialOffset, i) + offset, err := GetIndexOffsetInNthBlock(dbHeader, initialOffset, i) if err != nil { t.Fatalf("error getting index offset in nth block: %s", err) } @@ -428,7 +428,7 @@ func TestDbFileHeader_GetIndexOffsetInNthBlock(t *testing.T) { t.Run("GetIndexOffsetReturnsErrOutOfBoundsIfNIsBeyondNumberOfIndexBlocksInHeader", func(t *testing.T) { for i := numberOfBlocks; i < numberOfBlocks+2; i++ { - _, err := dbHeader.GetIndexOffsetInNthBlock(initialOffset, i) + _, err := GetIndexOffsetInNthBlock(dbHeader, initialOffset, i) expectedError := errors.NewErrOutOfBounds(fmt.Sprintf("n %d is out of bounds", i)) assert.Equal(t, expectedError, err) } diff --git a/scdb/internal/entries/headers/inverted_index_header.go b/scdb/internal/entries/headers/inverted_index_header.go new file mode 100644 index 0000000..bbd3630 --- /dev/null +++ b/scdb/internal/entries/headers/inverted_index_header.go @@ -0,0 +1,156 @@ +package headers + +import ( + "fmt" + "github.com/sopherapps/go-scdb/scdb/errors" + "github.com/sopherapps/go-scdb/scdb/internal" + "os" +) + +const DefaultMaxIndexKeyLen uint32 = 3 + +type InvertedIndexHeader struct { + Title []byte + BlockSize uint32 + MaxKeys uint64 + RedundantBlocks uint16 + ItemsPerIndexBlock uint64 + NumberOfIndexBlocks uint64 + ValuesStartPoint uint64 + NetBlockSize uint64 + MaxIndexKeyLen uint32 +} + +// NewInvertedIndexHeader creates a new InvertedIndexHeader +func NewInvertedIndexHeader(maxKeys *uint64, redundantBlocks *uint16, blockSize *uint32, maxIndexKeyLen *uint32) *InvertedIndexHeader { + header := InvertedIndexHeader{ + Title: []byte("ScdbIndex v0.001"), + } + + if maxIndexKeyLen != nil { + header.MaxIndexKeyLen = *maxIndexKeyLen + } else { + header.MaxIndexKeyLen = DefaultMaxIndexKeyLen + } + + if maxKeys != nil { + header.MaxKeys = *maxKeys + } else { + header.MaxKeys = DefaultMaxKeys * uint64(header.MaxIndexKeyLen) + } + + if redundantBlocks != nil { + header.RedundantBlocks = *redundantBlocks + } else { + header.RedundantBlocks = DefaultRedundantBlocks + } + + if blockSize != nil { + header.BlockSize = *blockSize + } else { + header.BlockSize = uint32(os.Getpagesize()) + } + + updateDerivedProps(&header) + + return &header +} + +// ExtractInvertedIndexHeaderFromByteArray extracts the inverted index header from the data byte array +func ExtractInvertedIndexHeaderFromByteArray(data []byte) (*InvertedIndexHeader, error) { + dataLength := len(data) + if dataLength < int(HeaderSizeInBytes) { + return nil, errors.NewErrOutOfBounds(fmt.Sprintf("header length is %d. expected %d", dataLength, HeaderSizeInBytes)) + } + + title := data[:16] + blockSize, err := internal.Uint32FromByteArray(data[16:20]) + if err != nil { + return nil, err + } + maxKeys, err := internal.Uint64FromByteArray(data[20:28]) + if err != nil { + return nil, err + } + redundantBlocks, err := internal.Uint16FromByteArray(data[28:30]) + if err != nil { + return nil, err + } + maxIndexKeyLen, err := internal.Uint32FromByteArray(data[30:34]) + if err != nil { + return nil, err + } + + header := InvertedIndexHeader{ + Title: title, + BlockSize: blockSize, + MaxKeys: maxKeys, + RedundantBlocks: redundantBlocks, + MaxIndexKeyLen: maxIndexKeyLen, + } + + updateDerivedProps(&header) + + return &header, nil +} + +// ExtractInvertedIndexHeaderFromFile extracts the header from an index file +func ExtractInvertedIndexHeaderFromFile(file *os.File) (*InvertedIndexHeader, error) { + data, err := readHeaderFile(file) + if err != nil { + return nil, err + } + + return ExtractInvertedIndexHeaderFromByteArray(data) +} + +func (h *InvertedIndexHeader) AsBytes() []byte { + return internal.ConcatByteArrays( + h.Title, + internal.Uint32ToByteArray(h.BlockSize), + internal.Uint64ToByteArray(h.MaxKeys), + internal.Uint16ToByteArray(h.RedundantBlocks), + internal.Uint32ToByteArray(h.MaxIndexKeyLen), + make([]byte, 66), + ) +} + +func (h *InvertedIndexHeader) GetItemsPerIndexBlock() uint64 { + return h.ItemsPerIndexBlock +} + +func (h *InvertedIndexHeader) GetNumberOfIndexBlocks() uint64 { + return h.NumberOfIndexBlocks +} + +func (h *InvertedIndexHeader) GetNetBlockSize() uint64 { + return h.NetBlockSize +} + +func (h *InvertedIndexHeader) GetBlockSize() uint32 { + return h.BlockSize +} + +func (h *InvertedIndexHeader) GetMaxKeys() uint64 { + return h.MaxKeys +} + +func (h *InvertedIndexHeader) GetRedundantBlocks() uint16 { + return h.RedundantBlocks +} + +func (h *InvertedIndexHeader) SetItemsPerIndexBlock(u uint64) { + h.ItemsPerIndexBlock = u +} + +func (h *InvertedIndexHeader) SetNumberOfIndexBlocks(u uint64) { + h.NumberOfIndexBlocks = u +} + +func (h *InvertedIndexHeader) SetNetBlockSize(u uint64) { + h.NetBlockSize = u +} + +func (h *InvertedIndexHeader) SetValuesStartPoint(u uint64) { + h.ValuesStartPoint = u +} diff --git a/scdb/internal/entries/headers/inverted_index_header_test.go b/scdb/internal/entries/headers/inverted_index_header_test.go new file mode 100644 index 0000000..cb8057e --- /dev/null +++ b/scdb/internal/entries/headers/inverted_index_header_test.go @@ -0,0 +1,546 @@ +package headers + +import ( + "fmt" + "github.com/sopherapps/go-scdb/scdb/errors" + "github.com/sopherapps/go-scdb/scdb/internal" + "github.com/stretchr/testify/assert" + "math" + "os" + "testing" +) + +func TestNewInvertedIndexHeader(t *testing.T) { + blockSize := uint32(os.Getpagesize()) + type testRecord struct { + maxKeys *uint64 + redundantBlocks *uint16 + maxIndexKeyLen *uint32 + expected *InvertedIndexHeader + } + var testMaxKeys uint64 = 24_000_000 + var testRedundantBlocks uint16 = 5 + var testMaxIndexKeyLen uint32 = 4 + expectedDefaultMaxKeys := DefaultMaxKeys * uint64(DefaultMaxIndexKeyLen) + testTable := []testRecord{ + {nil, nil, nil, generateInvertedIndexHeader(expectedDefaultMaxKeys, DefaultRedundantBlocks, blockSize, DefaultMaxIndexKeyLen)}, + {&testMaxKeys, nil, nil, generateInvertedIndexHeader(testMaxKeys, DefaultRedundantBlocks, blockSize, DefaultMaxIndexKeyLen)}, + {nil, &testRedundantBlocks, nil, generateInvertedIndexHeader(expectedDefaultMaxKeys, testRedundantBlocks, blockSize, DefaultMaxIndexKeyLen)}, + {nil, nil, &testMaxIndexKeyLen, generateInvertedIndexHeader(DefaultMaxKeys*uint64(testMaxIndexKeyLen), DefaultRedundantBlocks, blockSize, testMaxIndexKeyLen)}, + {&testMaxKeys, &testRedundantBlocks, &testMaxIndexKeyLen, generateInvertedIndexHeader(testMaxKeys, testRedundantBlocks, blockSize, testMaxIndexKeyLen)}, + } + + for _, record := range testTable { + got := NewInvertedIndexHeader(record.maxKeys, record.redundantBlocks, &blockSize, record.maxIndexKeyLen) + assert.Equal(t, record.expected, got) + } +} + +func TestExtractInvertedIndexHeaderFromByteArray(t *testing.T) { + blockSize := uint32(os.Getpagesize()) + blockSizeAsBytes := internal.Uint32ToByteArray(blockSize) + // title: ScdbIndex v0.001 + titleBytes := []byte{ + 83, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49, + } + reserveBytes := make([]byte, 66) + + t.Run("ExtractInvertedIndexHeaderFromByteArrayDoesJustThat", func(t *testing.T) { + type testRecord struct { + data []byte + expected *InvertedIndexHeader + } + + testData := []testRecord{ + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys DefaultMaxKeys */ + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + expected: generateInvertedIndexHeader(DefaultMaxKeys, DefaultRedundantBlocks, blockSize, 3), + }, + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 24_000_000 */ + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 9 */ + []byte{0, 0, 0, 9}, + reserveBytes), + expected: generateInvertedIndexHeader(24_000_000, DefaultRedundantBlocks, blockSize, 9), + }, + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys DefaultMaxKeys */ + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + /* redundant_blocks 9 */ + []byte{0, 9}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + expected: generateInvertedIndexHeader(DefaultMaxKeys, 9, blockSize, 3), + }, + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 24_000_000 */ + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks 5 */ + []byte{0, 5}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + expected: generateInvertedIndexHeader(24_000_000, 5, blockSize, 3), + }, + } + + for _, record := range testData { + got, err := ExtractInvertedIndexHeaderFromByteArray(record.data) + if err != nil { + t.Fatalf("error extracting header from byte array: %s", err) + } + + assert.Equal(t, record.expected, got) + } + }) + + t.Run("ExtractInvertedIndexHeaderFromByteArrayRaisesEErrOutOfBoundsWhenArrayIsTooShort", func(t *testing.T) { + type testRecord struct { + data []byte + expected *errors.ErrOutOfBounds + } + + testData := []testRecord{ + { + internal.ConcatByteArrays( + // title truncated + titleBytes[2:], + blockSizeAsBytes, + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + []byte{0, 1}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 98. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + // block size truncated + blockSizeAsBytes[:3], + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + []byte{0, 1}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 99. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys truncated */ + []byte{0, 15, 66, 64}, + []byte{0, 9}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 96. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks truncated */ + []byte{5}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 99. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + []byte{0, 5}, + /* maxIndexKeyLen truncated */ + []byte{0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 99. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + []byte{0, 5}, + []byte{0, 0, 0, 3}, + // reserve bytes truncated + reserveBytes[:60]), + errors.NewErrOutOfBounds("header length is 94. expected 100"), + }, + } + + for _, record := range testData { + _, err := ExtractInvertedIndexHeaderFromByteArray(record.data) + assert.Equal(t, record.expected, err) + } + }) +} + +func TestExtractInvertedIndexHeaderFromFile(t *testing.T) { + filePath := "testdb.scdb" + blockSize := uint32(os.Getpagesize()) + blockSizeAsBytes := internal.Uint32ToByteArray(blockSize) + // title: ScdbIndex v0.001 + titleBytes := []byte{ + 83, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49, + } + reserveBytes := make([]byte, 66) + + t.Run("ExtractInvertedIndexHeaderFromFileDoesJustThat", func(t *testing.T) { + defer func() { + _ = os.Remove(filePath) + }() + type testRecord struct { + data []byte + expected *InvertedIndexHeader + } + + testData := []testRecord{ + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys DefaultMaxKeys */ + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + expected: generateInvertedIndexHeader(DefaultMaxKeys, DefaultRedundantBlocks, blockSize, 3), + }, + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 24_000_000 */ + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + expected: generateInvertedIndexHeader(24_000_000, DefaultRedundantBlocks, blockSize, 3), + }, + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys DefaultMaxKeys */ + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + /* redundant_blocks 9 */ + []byte{0, 9}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + expected: generateInvertedIndexHeader(DefaultMaxKeys, 9, blockSize, 3), + }, + { + data: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 24_000_000 */ + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks 5 */ + []byte{0, 5}, + /* max_index_key_len 8 */ + []byte{0, 0, 0, 8}, + reserveBytes), + expected: generateInvertedIndexHeader(24_000_000, 5, blockSize, 8), + }, + } + + for _, record := range testData { + file, err := internal.GenerateFileWithTestData(filePath, record.data) + if err != nil { + t.Fatalf("error generating file with data: %s", err) + } + + got, err := ExtractInvertedIndexHeaderFromFile(file) + if err != nil { + t.Fatalf("error extracting header from file: %s", err) + } + _ = file.Close() + + assert.Equal(t, record.expected, got) + } + }) + + t.Run("ExtractInvertedIndexHeaderFromFileRaisesEErrOutOfBoundsWhenFileContentIsTooShort", func(t *testing.T) { + defer func() { + _ = os.Remove(filePath) + }() + type testRecord struct { + data []byte + expected *errors.ErrOutOfBounds + } + + testData := []testRecord{ + { + internal.ConcatByteArrays( + // title truncated + titleBytes[2:], + blockSizeAsBytes, + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + []byte{0, 1}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 98. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + // block size truncated + blockSizeAsBytes[:3], + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + []byte{0, 1}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 99. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys truncated */ + []byte{0, 15, 66, 64}, + []byte{0, 9}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 96. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks truncated */ + []byte{5}, + []byte{0, 0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 99. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + []byte{0, 5}, + /* maxIndexKeyLen truncated */ + []byte{0, 0, 3}, + reserveBytes), + errors.NewErrOutOfBounds("header length is 99. expected 100"), + }, + { + internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + []byte{0, 5}, + []byte{0, 0, 0, 3}, + // reserve bytes truncated + reserveBytes[:60]), + errors.NewErrOutOfBounds("header length is 94. expected 100"), + }, + } + + for _, record := range testData { + file, err := internal.GenerateFileWithTestData(filePath, record.data) + if err != nil { + t.Fatalf("error generating file with data: %s", err) + } + + _, err = ExtractInvertedIndexHeaderFromFile(file) + _ = file.Close() + assert.Equal(t, record.expected, err) + + err = os.Remove(filePath) + if err != nil { + t.Fatalf("error deleting db file: %s", err) + } + } + }) + +} + +func TestInvertedIndexHeader_AsBytes(t *testing.T) { + blockSize := uint32(os.Getpagesize()) + blockSizeAsBytes := internal.Uint32ToByteArray(blockSize) + // title: ScdbIndex v0.001 + titleBytes := []byte{ + 83, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49, + } + reserveBytes := make([]byte, 66) + type testRecord struct { + expected []byte + header *InvertedIndexHeader + } + + testMaxIndexKeyLen := uint32(9) + + testData := []testRecord{ + { + expected: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys DefaultMaxKeys */ + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + header: generateInvertedIndexHeader(DefaultMaxKeys, DefaultRedundantBlocks, blockSize, 3), + }, + { + expected: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 24_000_000 */ + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + header: generateInvertedIndexHeader(24_000_000, DefaultRedundantBlocks, blockSize, 3), + }, + { + expected: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys DefaultMaxKeys */ + []byte{0, 0, 0, 0, 0, 15, 66, 64}, + /* redundant_blocks 9 */ + []byte{0, 9}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + header: generateInvertedIndexHeader(DefaultMaxKeys, 9, blockSize, 3), + }, + { + expected: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 24_000_000 */ + []byte{0, 0, 0, 0, 1, 110, 54, 0}, + /* redundant_blocks 5 */ + []byte{0, 5}, + /* max_index_key_len 9 */ + []byte{0, 0, 0, 9}, + reserveBytes), + header: generateInvertedIndexHeader(24_000_000, 5, blockSize, 9), + }, + { + expected: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 3_000_000 i.e. maxIndexKeyLen * DefaultMaxKeys*/ + []byte{0, 0, 0, 0, 0, 45, 198, 192}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 3 */ + []byte{0, 0, 0, 3}, + reserveBytes), + header: NewInvertedIndexHeader(nil, nil, nil, nil), + }, + { + expected: internal.ConcatByteArrays( + titleBytes, + blockSizeAsBytes, + /* max_keys 9_000_000 i.e. maxIndexKeyLen * DefaultMaxKeys*/ + []byte{0, 0, 0, 0, 0, 137, 84, 64}, + /* redundant_blocks 1 */ + []byte{0, 1}, + /* max_index_key_len 9 */ + []byte{0, 0, 0, 9}, + reserveBytes), + header: NewInvertedIndexHeader(nil, nil, nil, &testMaxIndexKeyLen), + }, + } + + for _, record := range testData { + got := record.header.AsBytes() + assert.Equal(t, record.expected, got) + } +} + +func TestInvertedIndexHeader_GetIndexOffset(t *testing.T) { + dbHeader := NewInvertedIndexHeader(nil, nil, nil, nil) + offset := GetIndexOffset(dbHeader, []byte("foo")) + block1Start := HeaderSizeInBytes + block1End := dbHeader.NetBlockSize + block1Start + assert.LessOrEqual(t, block1Start, offset) + assert.Less(t, offset, block1End) +} + +func TestInvertedIndexHeader_GetIndexOffsetInNthBlock(t *testing.T) { + dbHeader := NewInvertedIndexHeader(nil, nil, nil, nil) + initialOffset := GetIndexOffset(dbHeader, []byte("foo")) + numberOfBlocks := dbHeader.NumberOfIndexBlocks + + t.Run("GetIndexOffsetInNthBlockWorksAsExpected", func(t *testing.T) { + for i := uint64(0); i < numberOfBlocks; i++ { + blockStart := HeaderSizeInBytes + (i * dbHeader.NetBlockSize) + blockEnd := dbHeader.NetBlockSize + blockStart + offset, err := GetIndexOffsetInNthBlock(dbHeader, initialOffset, i) + if err != nil { + t.Fatalf("error getting index offset in nth block: %s", err) + } + assert.LessOrEqual(t, blockStart, offset) + assert.Less(t, offset, blockEnd) + } + }) + + t.Run("GetIndexOffsetReturnsErrOutOfBoundsIfNIsBeyondNumberOfIndexBlocksInHeader", func(t *testing.T) { + for i := numberOfBlocks; i < numberOfBlocks+2; i++ { + _, err := GetIndexOffsetInNthBlock(dbHeader, initialOffset, i) + expectedError := errors.NewErrOutOfBounds(fmt.Sprintf("n %d is out of bounds", i)) + assert.Equal(t, expectedError, err) + } + }) + +} + +// generateInvertedIndexHeader generates a InvertedIndexHeader basing on the inputs supplied. +// This is just a helper for tests +func generateInvertedIndexHeader(maxKeys uint64, redundantBlocks uint16, blockSize uint32, maxIndexKeyLen uint32) *InvertedIndexHeader { + itemsPerIndexBlock := uint64(math.Floor(float64(blockSize) / float64(IndexEntrySizeInBytes))) + netBlockSize := itemsPerIndexBlock * IndexEntrySizeInBytes + numberOfIndexBlocks := uint64(math.Ceil(float64(maxKeys)/float64(itemsPerIndexBlock))) + uint64(redundantBlocks) + valuesStartPoint := HeaderSizeInBytes + (netBlockSize * numberOfIndexBlocks) + + return &InvertedIndexHeader{ + Title: []byte("ScdbIndex v0.001"), + BlockSize: blockSize, + MaxKeys: maxKeys, + RedundantBlocks: redundantBlocks, + ItemsPerIndexBlock: itemsPerIndexBlock, + NumberOfIndexBlocks: numberOfIndexBlocks, + ValuesStartPoint: valuesStartPoint, + NetBlockSize: netBlockSize, + MaxIndexKeyLen: maxIndexKeyLen, + } +} diff --git a/scdb/internal/entries/headers/shared.go b/scdb/internal/entries/headers/shared.go new file mode 100644 index 0000000..64017f2 --- /dev/null +++ b/scdb/internal/entries/headers/shared.go @@ -0,0 +1,125 @@ +package headers + +import ( + "fmt" + "github.com/sopherapps/go-scdb/scdb/errors" + "github.com/sopherapps/go-scdb/scdb/internal" + "math" + "os" +) + +const IndexEntrySizeInBytes uint64 = 8 +const HeaderSizeInBytes uint64 = 100 +const DefaultMaxKeys uint64 = 1_000_000 +const DefaultRedundantBlocks uint16 = 1 + +type Header interface { + // GetItemsPerIndexBlock gets the number of items per index block + GetItemsPerIndexBlock() uint64 + + // GetNumberOfIndexBlocks gets the number of index blocks for given header + GetNumberOfIndexBlocks() uint64 + + // GetNetBlockSize gets the net size of each index block + GetNetBlockSize() uint64 + + // GetBlockSize gets the raw block size used for the file + GetBlockSize() uint32 + + // GetMaxKeys gets the maximum number of keys permitted for the given file-based map + GetMaxKeys() uint64 + + // GetRedundantBlocks gets the redundant blocks to add to the index blocks to reduce hash collisions + // as the file of the file-based map gets full + GetRedundantBlocks() uint16 + + // AsBytes retrieves the byte array that represents the header. + AsBytes() []byte + + // SetItemsPerIndexBlock sets the number of items per index block of the header + SetItemsPerIndexBlock(u uint64) + + // SetNumberOfIndexBlocks sets the number of index blocks + SetNumberOfIndexBlocks(u uint64) + + // SetNetBlockSize sets the net block size + SetNetBlockSize(u uint64) + + // SetValuesStartPoint sets the values(or key-values) starting address in the file + SetValuesStartPoint(u uint64) +} + +// ReadHeaderFile reads the contents of a header file and returns +// the bytes there in +// +// The data got can be used to construct a Header instance +// for instance +func readHeaderFile(file *os.File) ([]byte, error) { + buf := make([]byte, HeaderSizeInBytes) + n, err := file.ReadAt(buf, 0) + if n < int(HeaderSizeInBytes) { + return nil, errors.NewErrOutOfBounds(fmt.Sprintf("header length is %d. expected %d", n, HeaderSizeInBytes)) + } + + if err != nil { + return nil, err + } + + return buf, nil +} + +// GetIndexOffset computes the offset for the given key in the first index block. +// It uses the meta data in the header +// i.e. number of items per block and the `IndexEntrySizeInBytes` +func GetIndexOffset(h Header, key []byte) uint64 { + hash := internal.GetHash(key, h.GetItemsPerIndexBlock()) + return HeaderSizeInBytes + (hash * IndexEntrySizeInBytes) +} + +// GetIndexOffsetInNthBlock returns the index offset for the nth index block if `initialOffset` is the offset +// in the top most index block `n` starts at zero where zero is the top most index block +func GetIndexOffsetInNthBlock(h Header, initialOffset uint64, n uint64) (uint64, error) { + if n >= h.GetNumberOfIndexBlocks() { + return 0, errors.NewErrOutOfBounds(fmt.Sprintf("n %d is out of bounds", n)) + } + num := initialOffset + (h.GetNetBlockSize() * n) + return num, nil +} + +// InitializeFile initializes the database/index file, giving it the header and the index placeholders +// and truncating it. +// +// It returns the new file size +func InitializeFile(file *os.File, header Header) (int64, error) { + headerBytes := header.AsBytes() + headerLength := int64(len(headerBytes)) + finalSize := headerLength + int64(header.GetNumberOfIndexBlocks()*header.GetNetBlockSize()) + + // First shrink file to zero, to delete all data + err := file.Truncate(0) + if err != nil { + return 0, err + } + + // The expanded the file again + err = file.Truncate(finalSize) + if err != nil { + return 0, err + } + + _, err = file.WriteAt(headerBytes, 0) + if err != nil { + return 0, err + } + + return finalSize, nil +} + +// updateDerivedProps computes the properties that depend on the user-defined/default properties and update them +// on the header +func updateDerivedProps(h Header) { + h.SetItemsPerIndexBlock(uint64(math.Floor(float64(h.GetBlockSize()) / float64(IndexEntrySizeInBytes)))) + h.SetNumberOfIndexBlocks(uint64(math.Ceil(float64(h.GetMaxKeys())/float64(h.GetItemsPerIndexBlock()))) + uint64(h.GetRedundantBlocks())) + h.SetNetBlockSize(h.GetItemsPerIndexBlock() * IndexEntrySizeInBytes) + h.SetValuesStartPoint(HeaderSizeInBytes + (h.GetNetBlockSize() * h.GetNumberOfIndexBlocks())) +} diff --git a/scdb/store.go b/scdb/store.go index 15cf213..2588def 100644 --- a/scdb/store.go +++ b/scdb/store.go @@ -121,10 +121,10 @@ func (s *Store) Set(k []byte, v []byte, ttl *uint64) error { expiry = uint64(time.Now().Unix()) + *ttl } - initialIdxOffset := s.header.GetIndexOffset(k) + initialIdxOffset := headers.GetIndexOffset(s.header, k) for idxBlock := uint64(0); idxBlock < s.header.NumberOfIndexBlocks; idxBlock++ { - indexOffset, err := s.header.GetIndexOffsetInNthBlock(initialIdxOffset, idxBlock) + indexOffset, err := headers.GetIndexOffsetInNthBlock(s.header, initialIdxOffset, idxBlock) if err != nil { return err } @@ -176,10 +176,10 @@ func (s *Store) Get(k []byte) ([]byte, error) { s.mu.Lock() defer s.mu.Unlock() - initialIdxOffset := s.header.GetIndexOffset(k) + initialIdxOffset := headers.GetIndexOffset(s.header, k) for idxBlock := uint64(0); idxBlock < s.header.NumberOfIndexBlocks; idxBlock++ { - indexOffset, err := s.header.GetIndexOffsetInNthBlock(initialIdxOffset, idxBlock) + indexOffset, err := headers.GetIndexOffsetInNthBlock(s.header, initialIdxOffset, idxBlock) if err != nil { return nil, err } @@ -244,10 +244,10 @@ func (s *Store) Delete(k []byte) error { ch <- s.searchIndex.Remove(k) }(done) - initialIdxOffset := s.header.GetIndexOffset(k) + initialIdxOffset := headers.GetIndexOffset(s.header, k) for idxBlock := uint64(0); idxBlock < s.header.NumberOfIndexBlocks; idxBlock++ { - indexOffset, err := s.header.GetIndexOffsetInNthBlock(initialIdxOffset, idxBlock) + indexOffset, err := headers.GetIndexOffsetInNthBlock(s.header, initialIdxOffset, idxBlock) if err != nil { return err } From 12725f552fa1e2fe2a754d92896866123b684189 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 03:19:13 +0300 Subject: [PATCH 03/15] Fix test for BufferPool.GetManyKeyValues --- scdb/internal/buffers/pool.go | 2 +- scdb/internal/buffers/pool_test.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scdb/internal/buffers/pool.go b/scdb/internal/buffers/pool.go index 4b5320b..c6079df 100644 --- a/scdb/internal/buffers/pool.go +++ b/scdb/internal/buffers/pool.go @@ -504,7 +504,7 @@ func (bp *BufferPool) GetManyKeyValues(addrs []uint64) ([]KeyValuePair, error) { return nil, err } - if !entry.IsDeleted && entry.IsExpired() { + if !entry.IsDeleted && !entry.IsExpired() { results = append(results, KeyValuePair{ K: entry.Key, V: entry.Value, diff --git a/scdb/internal/buffers/pool_test.go b/scdb/internal/buffers/pool_test.go index 451aa15..bd63e35 100644 --- a/scdb/internal/buffers/pool_test.go +++ b/scdb/internal/buffers/pool_test.go @@ -662,7 +662,7 @@ func TestBufferPool_GetManyKeyValues(t *testing.T) { t.Fatalf("error creating the db file header: %s", err) } - addresses := make([]uint64, len(table)) + addresses := make([]uint64, 0, len(table)) for _, pair := range table { kv := values.NewKeyValueEntry(pair.K, pair.V, 0) insertKeyValueEntry(t, pool, header, kv) @@ -703,7 +703,7 @@ func TestBufferPool_GetManyKeyValues(t *testing.T) { t.Fatalf("error creating the db file header: %s", err) } - addresses := make([]uint64, len(nonExpired)+len(expired)) + addresses := make([]uint64, 0, len(nonExpired)+len(expired)) for _, pair := range nonExpired { kv := values.NewKeyValueEntry(pair.K, pair.V, 0) insertKeyValueEntry(t, pool, header, kv) @@ -752,7 +752,7 @@ func TestBufferPool_GetManyKeyValues(t *testing.T) { t.Fatalf("error creating the db file header: %s", err) } - addresses := make([]uint64, len(unDeleted)+len(deleted)) + addresses := make([]uint64, 0, len(unDeleted)+len(deleted)) for _, pair := range unDeleted { kv := values.NewKeyValueEntry(pair.K, pair.V, 0) insertKeyValueEntry(t, pool, header, kv) From 2ebc729afb92580dea70db805ae36bab372ff765 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 04:08:56 +0300 Subject: [PATCH 04/15] Add InvertedIndexEntry --- scdb/internal/buffers/buffer.go | 2 +- scdb/internal/buffers/pool.go | 6 +- .../entries/values/inverted_index_entry.go | 160 ++++++++++++++++++ .../values/inverted_index_entry_test.go | 67 ++++++++ scdb/internal/entries/values/key_value.go | 11 +- .../internal/entries/values/key_value_test.go | 6 +- scdb/internal/entries/values/shared.go | 22 +++ 7 files changed, 258 insertions(+), 16 deletions(-) create mode 100644 scdb/internal/entries/values/inverted_index_entry.go create mode 100644 scdb/internal/entries/values/inverted_index_entry_test.go create mode 100644 scdb/internal/entries/values/shared.go diff --git a/scdb/internal/buffers/buffer.go b/scdb/internal/buffers/buffer.go index 2619205..8560f70 100644 --- a/scdb/internal/buffers/buffer.go +++ b/scdb/internal/buffers/buffer.go @@ -83,7 +83,7 @@ func (b *Buffer) GetValue(addr uint64, key []byte) (*values.KeyValueEntry, error return nil, err } - if bytes.Equal(entry.Key, key) && !entry.IsDeleted && !entry.IsExpired() { + if bytes.Equal(entry.Key, key) && !entry.IsDeleted && !values.IsExpired(entry) { return entry, nil } return nil, nil diff --git a/scdb/internal/buffers/pool.go b/scdb/internal/buffers/pool.go index c6079df..9d115ec 100644 --- a/scdb/internal/buffers/pool.go +++ b/scdb/internal/buffers/pool.go @@ -257,7 +257,7 @@ func (bp *BufferPool) CompactFile(searchIndex *internal.InvertedIndex) error { return e } - if !kv.IsExpired() && !kv.IsDeleted { + if !values.IsExpired(kv) && !kv.IsDeleted { kvSize := int64(len(kvByteArray)) // insert key value at the bottom of the new file _, er := newFile.WriteAt(kvByteArray, newFileOffset) @@ -346,7 +346,7 @@ func (bp *BufferPool) GetValue(kvAddress uint64, key []byte) (*values.KeyValueEn return nil, err } - if bytes.Equal(entry.Key, key) && !entry.IsExpired() && !entry.IsDeleted { + if bytes.Equal(entry.Key, key) && !values.IsExpired(entry) && !entry.IsDeleted { return entry, nil } @@ -504,7 +504,7 @@ func (bp *BufferPool) GetManyKeyValues(addrs []uint64) ([]KeyValuePair, error) { return nil, err } - if !entry.IsDeleted && !entry.IsExpired() { + if !entry.IsDeleted && !values.IsExpired(entry) { results = append(results, KeyValuePair{ K: entry.Key, V: entry.Value, diff --git a/scdb/internal/entries/values/inverted_index_entry.go b/scdb/internal/entries/values/inverted_index_entry.go new file mode 100644 index 0000000..9ea1f15 --- /dev/null +++ b/scdb/internal/entries/values/inverted_index_entry.go @@ -0,0 +1,160 @@ +package values + +import "github.com/sopherapps/go-scdb/scdb/internal" + +const InvertedIndexEntryMinSizeInBytes uint32 = 4 + 4 + 1 + 1 + 8 + 8 + 8 + 8 + +type InvertedIndexEntry struct { + Size uint32 + IndexKeySize uint32 + IndexKey []byte + Key []byte + IsDeleted bool + IsRoot bool + Expiry uint64 + NextOffset uint64 + PreviousOffset uint64 + KvAddress uint64 +} + +// NewInvertedIndexEntry creates a new InvertedIndexEntry +func NewInvertedIndexEntry(indexKey []byte, key []byte, expiry uint64, isRoot bool, kvAddr uint64, nextOffset uint64, previousOffset uint64) *InvertedIndexEntry { + keySize := uint32(len(key)) + indexKeySize := uint32(len(indexKey)) + size := keySize + indexKeySize + InvertedIndexEntryMinSizeInBytes + + return &InvertedIndexEntry{ + Size: size, + IndexKeySize: indexKeySize, + IndexKey: indexKey, + Key: key, + IsDeleted: false, + IsRoot: isRoot, + Expiry: expiry, + NextOffset: nextOffset, + PreviousOffset: previousOffset, + KvAddress: kvAddr, + } +} + +// ExtractInvertedIndexEntryFromByteArray extracts the key value entry from the data byte array +func ExtractInvertedIndexEntryFromByteArray(data []byte, offset uint64) (*InvertedIndexEntry, error) { + dataLength := uint64(len(data)) + sizeSlice, err := internal.SafeSlice(data, offset, offset+4, dataLength) + if err != nil { + return nil, err + } + size, err := internal.Uint32FromByteArray(sizeSlice) + if err != nil { + return nil, err + } + + indexKeySizeSlice, err := internal.SafeSlice(data, offset+4, offset+8, dataLength) + if err != nil { + return nil, err + } + indexKeySize, err := internal.Uint32FromByteArray(indexKeySizeSlice) + if err != nil { + return nil, err + } + + indexKeySizeU64 := uint64(indexKeySize) + indexKey, err := internal.SafeSlice(data, offset+8, offset+8+indexKeySizeU64, dataLength) + if err != nil { + return nil, err + } + + keySizeU64 := uint64(size - indexKeySize - InvertedIndexEntryMinSizeInBytes) + key, err := internal.SafeSlice(data, offset+8+indexKeySizeU64, offset+8+indexKeySizeU64+keySizeU64, dataLength) + if err != nil { + return nil, err + } + + isDeletedSlice, err := internal.SafeSlice(data, offset+8+indexKeySizeU64+keySizeU64, offset+9+indexKeySizeU64+keySizeU64, dataLength) + if err != nil { + return nil, err + } + isDeleted, err := internal.BoolFromByteArray(isDeletedSlice) + if err != nil { + return nil, err + } + + isRootSlice, err := internal.SafeSlice(data, offset+9+indexKeySizeU64+keySizeU64, offset+10+indexKeySizeU64+keySizeU64, dataLength) + if err != nil { + return nil, err + } + isRoot, err := internal.BoolFromByteArray(isRootSlice) + if err != nil { + return nil, err + } + + expirySlice, err := internal.SafeSlice(data, offset+10+indexKeySizeU64+keySizeU64, offset+indexKeySizeU64+keySizeU64+18, dataLength) + if err != nil { + return nil, err + } + expiry, err := internal.Uint64FromByteArray(expirySlice) + if err != nil { + return nil, err + } + + nextOffsetSlice, err := internal.SafeSlice(data, offset+18+indexKeySizeU64+keySizeU64, offset+indexKeySizeU64+keySizeU64+26, dataLength) + if err != nil { + return nil, err + } + nextOffset, err := internal.Uint64FromByteArray(nextOffsetSlice) + if err != nil { + return nil, err + } + + prevOffsetSlice, err := internal.SafeSlice(data, offset+26+indexKeySizeU64+keySizeU64, offset+indexKeySizeU64+keySizeU64+34, dataLength) + if err != nil { + return nil, err + } + prevOffset, err := internal.Uint64FromByteArray(prevOffsetSlice) + if err != nil { + return nil, err + } + + kvAddrSlice, err := internal.SafeSlice(data, offset+34+indexKeySizeU64+keySizeU64, offset+indexKeySizeU64+keySizeU64+42, dataLength) + if err != nil { + return nil, err + } + kvAddr, err := internal.Uint64FromByteArray(kvAddrSlice) + if err != nil { + return nil, err + } + + entry := InvertedIndexEntry{ + Size: size, + IndexKeySize: indexKeySize, + IndexKey: indexKey, + Key: key, + IsDeleted: isDeleted, + IsRoot: isRoot, + Expiry: expiry, + NextOffset: nextOffset, + PreviousOffset: prevOffset, + KvAddress: kvAddr, + } + + return &entry, nil +} + +func (ide *InvertedIndexEntry) GetExpiry() uint64 { + return ide.Expiry +} + +func (ide *InvertedIndexEntry) AsBytes() []byte { + return internal.ConcatByteArrays( + internal.Uint32ToByteArray(ide.Size), + internal.Uint32ToByteArray(ide.IndexKeySize), + ide.IndexKey, + ide.Key, + internal.BoolToByteArray(ide.IsDeleted), + internal.BoolToByteArray(ide.IsRoot), + internal.Uint64ToByteArray(ide.Expiry), + internal.Uint64ToByteArray(ide.NextOffset), + internal.Uint64ToByteArray(ide.PreviousOffset), + internal.Uint64ToByteArray(ide.KvAddress), + ) +} diff --git a/scdb/internal/entries/values/inverted_index_entry_test.go b/scdb/internal/entries/values/inverted_index_entry_test.go new file mode 100644 index 0000000..b0b02e7 --- /dev/null +++ b/scdb/internal/entries/values/inverted_index_entry_test.go @@ -0,0 +1,67 @@ +package values + +import ( + "fmt" + "github.com/sopherapps/go-scdb/scdb/errors" + "github.com/sopherapps/go-scdb/scdb/internal" + "github.com/stretchr/testify/assert" + "testing" + "time" +) + +var valuesByteArray = []byte{ + /* size: 47u32*/ 0, 0, 0, 47, + /* index key size: 2u32*/ 0, 0, 0, 2, + /* key: fo */ 102, 111, + /* key: foo */ 102, 111, 111, + /* is_deleted */ 0, + /* is_root */ 0, + /* expiry 0u64 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* next offset 900u64 */ 0, 0, 0, 0, 0, 0, 3, 132, + /* previous offset 90u64 */ 0, 0, 0, 0, 0, 0, 0, 90, + /* kv_address: 100u64 */ 0, 0, 0, 0, 0, 0, 0, 100, +} + +func TestExtractInvertedIndexEntryFromByteArray(t *testing.T) { + entry := NewInvertedIndexEntry([]byte("fo"), []byte("foo"), 0, false, 100, 900, 90) + + t.Run("ExtractInvertedIndexEntryFromByteArrayWorksAsExpected", func(t *testing.T) { + got, err := ExtractInvertedIndexEntryFromByteArray(valuesByteArray, 0) + if err != nil { + t.Fatalf("error extracting key value from byte array: %s", err) + } + assert.Equal(t, entry, got) + }) + + t.Run("ExtractInvertedIndexEntryFromByteArrayWithOffsetWorksAsExpected", func(t *testing.T) { + dataArray := internal.ConcatByteArrays([]byte{89, 78}, valuesByteArray) + got, err := ExtractInvertedIndexEntryFromByteArray(dataArray, 2) + if err != nil { + t.Fatalf("error extracting key value from byte array: %s", err) + } + assert.Equal(t, entry, got) + }) + + t.Run("ExtractInvertedIndexEntryFromByteArrayWithOutOfBoundsOffsetReturnsErrOutOfBounds", func(t *testing.T) { + dataArray := internal.ConcatByteArrays([]byte{89, 78}, valuesByteArray) + _, err := ExtractInvertedIndexEntryFromByteArray(dataArray, 4) + expectedError := errors.NewErrOutOfBounds(fmt.Sprintf("slice %d - %d out of bounds for maxLength %d for data %v", 12, 157307, len(dataArray), dataArray)) + assert.Equal(t, expectedError, err) + }) +} + +func TestInvertedIndexEntry_AsBytes(t *testing.T) { + entry := NewInvertedIndexEntry([]byte("fo"), []byte("foo"), 0, false, 100, 900, 90) + assert.Equal(t, valuesByteArray, entry.AsBytes()) +} + +func TestInvertedIndexEntry_IsExpired(t *testing.T) { + neverExpires := NewInvertedIndexEntry([]byte("ne"), []byte("never_expires"), 0, false, 100, 900, 90) + // 1666023836 is some past timestamp in October 2022 + expired := NewInvertedIndexEntry([]byte("exp"), []byte("expires"), 1666023836, false, 100, 900, 90) + notExpired := NewInvertedIndexEntry([]byte("no"), []byte("not_expired"), uint64(time.Now().Unix())*2, false, 100, 900, 90) + + assert.False(t, IsExpired(neverExpires)) + assert.False(t, IsExpired(notExpired)) + assert.True(t, IsExpired(expired)) +} diff --git a/scdb/internal/entries/values/key_value.go b/scdb/internal/entries/values/key_value.go index 6cacd1f..a230881 100644 --- a/scdb/internal/entries/values/key_value.go +++ b/scdb/internal/entries/values/key_value.go @@ -2,7 +2,6 @@ package values import ( "github.com/sopherapps/go-scdb/scdb/internal" - "time" ) const KeyValueMinSizeInBytes uint32 = 4 + 4 + 8 + 1 @@ -110,12 +109,6 @@ func (kv *KeyValueEntry) AsBytes() []byte { ) } -// IsExpired returns true if key has lived for longer than its time-to-live -// It will always return false if time-to-live was never set -func (kv *KeyValueEntry) IsExpired() bool { - if kv.Expiry == 0 { - return false - } else { - return kv.Expiry < uint64(time.Now().Unix()) - } +func (kv *KeyValueEntry) GetExpiry() uint64 { + return kv.Expiry } diff --git a/scdb/internal/entries/values/key_value_test.go b/scdb/internal/entries/values/key_value_test.go index 23f4653..357465c 100644 --- a/scdb/internal/entries/values/key_value_test.go +++ b/scdb/internal/entries/values/key_value_test.go @@ -57,7 +57,7 @@ func TestKeyValueEntry_IsExpired(t *testing.T) { expired := NewKeyValueEntry([]byte("expires"), []byte("bar"), 1666023836) notExpired := NewKeyValueEntry([]byte("not_expired"), []byte("bar"), uint64(time.Now().Unix())*2) - assert.False(t, neverExpires.IsExpired()) - assert.False(t, notExpired.IsExpired()) - assert.True(t, expired.IsExpired()) + assert.False(t, IsExpired(neverExpires)) + assert.False(t, IsExpired(notExpired)) + assert.True(t, IsExpired(expired)) } diff --git a/scdb/internal/entries/values/shared.go b/scdb/internal/entries/values/shared.go new file mode 100644 index 0000000..967b496 --- /dev/null +++ b/scdb/internal/entries/values/shared.go @@ -0,0 +1,22 @@ +package values + +import "time" + +type ValueEntry interface { + // GetExpiry gets the expiry of the value entry + GetExpiry() uint64 + + // AsBytes retrieves the byte array that represents the value entry. + AsBytes() []byte +} + +// IsExpired returns true if key has lived for longer than its time-to-live +// It will always return false if time-to-live was never set +func IsExpired(v ValueEntry) bool { + expiry := v.GetExpiry() + if expiry == 0 { + return false + } else { + return expiry < uint64(time.Now().Unix()) + } +} From 78ed3924cd4b5105acf0eceb59e8f7c6c1814e00 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 14:45:10 +0300 Subject: [PATCH 05/15] WIP: Add tests InvertedIndex --- scdb/internal/buffers/pool.go | 3 +- scdb/internal/buffers/pool_test.go | 3 +- .../{ => inverted_index}/inverted_index.go | 26 +- .../inverted_index/inverted_index_test.go | 404 ++++++++++++++++++ scdb/internal/inverted_index_test.go | 1 - scdb/store.go | 5 +- 6 files changed, 433 insertions(+), 9 deletions(-) rename scdb/internal/{ => inverted_index}/inverted_index.go (72%) create mode 100644 scdb/internal/inverted_index/inverted_index_test.go delete mode 100644 scdb/internal/inverted_index_test.go diff --git a/scdb/internal/buffers/pool.go b/scdb/internal/buffers/pool.go index 9d115ec..dfbb1cd 100644 --- a/scdb/internal/buffers/pool.go +++ b/scdb/internal/buffers/pool.go @@ -7,6 +7,7 @@ import ( "github.com/sopherapps/go-scdb/scdb/internal" "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" "github.com/sopherapps/go-scdb/scdb/internal/entries/values" + "github.com/sopherapps/go-scdb/scdb/internal/inverted_index" "io" "math" "os" @@ -200,7 +201,7 @@ func (bp *BufferPool) ClearFile() error { // CompactFile removes any deleted or expired entries from the file. It must first lock the buffer and the file. // In order to be more efficient, it creates a new file, copying only that data which is not deleted or expired -func (bp *BufferPool) CompactFile(searchIndex *internal.InvertedIndex) error { +func (bp *BufferPool) CompactFile(searchIndex *inverted_index.InvertedIndex) error { folder := filepath.Dir(bp.FilePath) newFilePath := filepath.Join(folder, "tmp__compact.scdb") newFile, err := os.OpenFile(newFilePath, os.O_RDWR|os.O_CREATE, 0666) diff --git a/scdb/internal/buffers/pool_test.go b/scdb/internal/buffers/pool_test.go index bd63e35..8253bf8 100644 --- a/scdb/internal/buffers/pool_test.go +++ b/scdb/internal/buffers/pool_test.go @@ -5,6 +5,7 @@ import ( "github.com/sopherapps/go-scdb/scdb/internal" "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" "github.com/sopherapps/go-scdb/scdb/internal/entries/values" + "github.com/sopherapps/go-scdb/scdb/internal/inverted_index" "github.com/stretchr/testify/assert" "os" "testing" @@ -476,7 +477,7 @@ func TestBufferPool_CompactFile(t *testing.T) { initialFileSize := getActualFileSize(t, fileName) - searchIndex, err := internal.NewInvertedIndex(indexFileName, nil, nil, nil) + searchIndex, err := inverted_index.NewInvertedIndex(indexFileName, nil, nil, nil) if err != nil { t.Fatalf("error creating a search index: %s", err) } diff --git a/scdb/internal/inverted_index.go b/scdb/internal/inverted_index/inverted_index.go similarity index 72% rename from scdb/internal/inverted_index.go rename to scdb/internal/inverted_index/inverted_index.go index 28b499f..ac24e60 100644 --- a/scdb/internal/inverted_index.go +++ b/scdb/internal/inverted_index/inverted_index.go @@ -1,9 +1,17 @@ -package internal +package inverted_index -import "os" +import ( + "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" + "os" +) type InvertedIndex struct { - File *os.File + File *os.File + FilePath string + MaxIndexKeyLen uint32 + ValuesStartPoint uint64 + FileSize uint64 + header *headers.InvertedIndexHeader } // NewInvertedIndex initializes a new Inverted Index @@ -42,12 +50,22 @@ func (idx *InvertedIndex) Remove(key []byte) error { // Clear clears all the data in the search index, except the header, and its original // variables func (idx *InvertedIndex) Clear() error { + header := headers.NewInvertedIndexHeader(&idx.header.MaxKeys, &idx.header.RedundantBlocks, &idx.header.BlockSize, &idx.header.MaxIndexKeyLen) + fileSize, err := headers.InitializeFile(idx.File, header) + if err != nil { + return err + } + + idx.FileSize = uint64(fileSize) return nil } // Eq checks if the other InvertedIndex instance equals the current inverted index func (idx *InvertedIndex) Eq(other *InvertedIndex) bool { - return false + return idx.ValuesStartPoint == other.ValuesStartPoint && + idx.MaxIndexKeyLen == other.MaxIndexKeyLen && + idx.FilePath == other.FilePath && + idx.FileSize == other.FileSize } // Close closes the buffer pool, freeing up any resources diff --git a/scdb/internal/inverted_index/inverted_index_test.go b/scdb/internal/inverted_index/inverted_index_test.go new file mode 100644 index 0000000..5d12f0c --- /dev/null +++ b/scdb/internal/inverted_index/inverted_index_test.go @@ -0,0 +1,404 @@ +package inverted_index + +import ( + "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" + "github.com/stretchr/testify/assert" + "os" + "testing" + "time" +) + +type testSearchParams struct { + term []byte + skip uint64 + limit uint64 + expected []uint64 +} + +type testAddParams struct { + k []byte + addr uint64 + expiry uint64 +} + +func TestNewInvertedIndex(t *testing.T) { + fileName := "testdb.iscdb" + testMaxKeys := uint64(360) + testMaxIndexKeyLen := uint32(10) + testRedundantBlocks := uint16(4) + defer func() { + _ = os.Remove(fileName) + }() + + t.Run("NewInvertedIndexForNonExistingFile", func(t *testing.T) { + type expectedRecord struct { + maxIndexKeyLen uint32 + valuesStartPoint uint64 + filePath string + fileSize uint64 + } + type testRecord struct { + maxIndexKeyLen *uint32 + filePath string + maxKeys *uint64 + redundantBlocks *uint16 + expected expectedRecord + } + + testData := []testRecord{ + {nil, fileName, nil, nil, expectedRecord{ + maxIndexKeyLen: headers.DefaultMaxIndexKeyLen, + valuesStartPoint: headers.NewInvertedIndexHeader(nil, nil, nil, nil).ValuesStartPoint, + filePath: fileName, + fileSize: headers.NewInvertedIndexHeader(nil, nil, nil, nil).ValuesStartPoint, + }}, + {&testMaxIndexKeyLen, fileName, nil, nil, expectedRecord{ + maxIndexKeyLen: testMaxIndexKeyLen, + valuesStartPoint: headers.NewInvertedIndexHeader(nil, nil, nil, &testMaxIndexKeyLen).ValuesStartPoint, + filePath: fileName, + fileSize: headers.NewInvertedIndexHeader(nil, nil, nil, &testMaxIndexKeyLen).ValuesStartPoint, + }}, + {nil, fileName, &testMaxKeys, nil, expectedRecord{ + maxIndexKeyLen: headers.DefaultMaxIndexKeyLen, + valuesStartPoint: headers.NewInvertedIndexHeader(&testMaxKeys, nil, nil, nil).ValuesStartPoint, + filePath: fileName, + fileSize: headers.NewInvertedIndexHeader(&testMaxKeys, nil, nil, nil).ValuesStartPoint, + }}, + {nil, fileName, nil, &testRedundantBlocks, expectedRecord{ + maxIndexKeyLen: headers.DefaultMaxIndexKeyLen, + valuesStartPoint: headers.NewInvertedIndexHeader(nil, &testRedundantBlocks, nil, nil).ValuesStartPoint, + filePath: fileName, + fileSize: headers.NewInvertedIndexHeader(nil, &testRedundantBlocks, nil, nil).ValuesStartPoint, + }}, + } + + // delete the file so that BufferPool::new() can reinitialize it. + _ = os.Remove(fileName) + + for _, record := range testData { + got, err := NewInvertedIndex(record.filePath, record.maxIndexKeyLen, record.maxKeys, record.redundantBlocks) + if err != nil { + t.Fatalf("error creating new inverted index: %s", err) + } + + assert.Equal(t, record.expected.maxIndexKeyLen, got.MaxIndexKeyLen) + assert.Equal(t, record.expected.valuesStartPoint, got.ValuesStartPoint) + assert.Equal(t, record.expected.filePath, got.FilePath) + assert.Equal(t, record.expected.fileSize, got.FileSize) + + err = os.Remove(got.FilePath) + if err != nil { + t.Fatalf("error removing inverted index file: %s", got.FilePath) + } + } + }) + + t.Run("NewInvertedIndexForExistingFile", func(t *testing.T) { + type testRecord struct { + filePath string + maxIndexKeyLen *uint32 + maxKeys *uint64 + redundantBlocks *uint16 + } + + testData := []testRecord{ + {fileName, nil, nil, nil}, + {fileName, &testMaxIndexKeyLen, nil, nil}, + {fileName, nil, &testMaxKeys, nil}, + {fileName, nil, nil, &testRedundantBlocks}, + } + + for _, record := range testData { + first, err := NewInvertedIndex(record.filePath, record.maxIndexKeyLen, record.maxKeys, record.redundantBlocks) + if err != nil { + t.Fatalf("error creating new inverted index: %s", err) + } + + second, err := NewInvertedIndex(record.filePath, record.maxIndexKeyLen, record.maxKeys, record.redundantBlocks) + if err != nil { + t.Fatalf("error creating new inverted index: %s", err) + } + + assert.True(t, first.Eq(second)) + + err = os.Remove(first.FilePath) + if err != nil { + t.Fatalf("error removing inverted index file: %s", first.FilePath) + } + } + }) +} + +func TestInvertedIndex_Add(t *testing.T) { + fileName := "testdb.iscdb" + now := uint64(time.Now().Unix()) + + addParams := []testAddParams{ + {[]byte("foo"), 20, 0}, + {[]byte("food"), 60, now + 3600}, + {[]byte("fore"), 160, 0}, + {[]byte("bar"), 600, now - 3600}, // expired + {[]byte("bare"), 90, now + 7200}, + {[]byte("barricade"), 900, 0}, + {[]byte("pig"), 80, 0}, + } + + t.Run("AddAddsTheKeyAndAddrToInvertedIndex", func(t *testing.T) { + defer func() { + _ = os.Remove(fileName) + }() + table := []testSearchParams{ + {[]byte("f"), 0, 0, []uint64{20, 60, 160}}, + {[]byte("fo"), 0, 0, []uint64{20, 60, 160}}, + {[]byte("foo"), 0, 0, []uint64{20, 60}}, + {[]byte("for"), 0, 0, []uint64{160}}, + {[]byte("food"), 0, 0, []uint64{60}}, + {[]byte("fore"), 0, 0, []uint64{160}}, + {[]byte("b"), 0, 0, []uint64{90, 900}}, + {[]byte("ba"), 0, 0, []uint64{90, 900}}, + {[]byte("bar"), 0, 0, []uint64{90, 900}}, + {[]byte("bare"), 0, 0, []uint64{90}}, + {[]byte("barr"), 0, 0, []uint64{900}}, + {[]byte("p"), 0, 0, []uint64{80}}, + {[]byte("pi"), 0, 0, []uint64{80}}, + {[]byte("pig"), 0, 0, []uint64{80}}, + } + + searchIdx := createSearchIndex(t, fileName, addParams) + testSearchResults(t, searchIdx, table) + }) + + t.Run("AddUpdatesPreexistingKeyAndAddrToInvertedIndex", func(t *testing.T) { + defer func() { + _ = os.Remove(fileName) + }() + + updates := []testAddParams{ + {[]byte("foo"), 20, now - 30}, // expired + {[]byte("bare"), 90, now - 7200}, // expired + {[]byte("bar"), 500, now + 3600}, + } + table := []testSearchParams{ + {[]byte("f"), 0, 0, []uint64{60, 160}}, + {[]byte("fo"), 0, 0, []uint64{60, 160}}, + {[]byte("foo"), 0, 0, []uint64{60}}, + {[]byte("for"), 0, 0, []uint64{160}}, + {[]byte("food"), 0, 0, []uint64{60}}, + {[]byte("fore"), 0, 0, []uint64{160}}, + {[]byte("b"), 0, 0, []uint64{500, 900}}, + {[]byte("ba"), 0, 0, []uint64{500, 900}}, + {[]byte("bar"), 0, 0, []uint64{500, 900}}, + {[]byte("bare"), 0, 0, []uint64{}}, + {[]byte("barr"), 0, 0, []uint64{900}}, + {[]byte("p"), 0, 0, []uint64{80}}, + {[]byte("pi"), 0, 0, []uint64{80}}, + {[]byte("pig"), 0, 0, []uint64{80}}, + } + + searchIdx := createSearchIndex(t, fileName, addParams) + for _, p := range updates { + err := searchIdx.Add(p.k, p.addr, p.expiry) + if err != nil { + t.Fatalf("error updating key address %s: %s", p.k, err) + } + } + + testSearchResults(t, searchIdx, table) + }) +} + +func TestInvertedIndex_Search(t *testing.T) { + fileName := "testdb.iscdb" + defer func() { + _ = os.Remove(fileName) + }() + + now := uint64(time.Now().Unix()) + + addParams := []testAddParams{ + {[]byte("foo"), 20, 0}, + {[]byte("food"), 60, now + 3600}, + {[]byte("fore"), 160, 0}, + {[]byte("bar"), 600, now - 3600}, // expired + {[]byte("bare"), 90, now + 7200}, + {[]byte("barricade"), 900, 0}, + {[]byte("pig"), 80, 0}, + } + + table := []testSearchParams{ + {[]byte("f"), 0, 0, []uint64{20, 60, 160}}, + {[]byte("f"), 1, 0, []uint64{60, 160}}, + {[]byte("f"), 2, 0, []uint64{160}}, + {[]byte("f"), 3, 0, []uint64{}}, + {[]byte("f"), 0, 3, []uint64{20, 60, 160}}, + {[]byte("f"), 0, 2, []uint64{20, 60}}, + {[]byte("f"), 1, 3, []uint64{60, 160}}, + {[]byte("f"), 1, 2, []uint64{60, 160}}, + {[]byte("f"), 2, 2, []uint64{160}}, + {[]byte("fo"), 0, 0, []uint64{20, 60, 160}}, + {[]byte("fo"), 1, 0, []uint64{60, 160}}, + {[]byte("fo"), 2, 0, []uint64{160}}, + {[]byte("fo"), 1, 1, []uint64{60}}, + {[]byte("bar"), 0, 0, []uint64{90, 900}}, + {[]byte("bar"), 1, 0, []uint64{900}}, + {[]byte("bar"), 1, 1, []uint64{900}}, + {[]byte("bar"), 1, 1, []uint64{900}}, + {[]byte("pi"), 0, 2, []uint64{80}}, + {[]byte("pi"), 1, 2, []uint64{}}, + {[]byte("pigg"), 1, 2, []uint64{}}, + {[]byte("ben"), 1, 2, []uint64{}}, + } + + searchIdx := createSearchIndex(t, fileName, addParams) + testSearchResults(t, searchIdx, table) +} + +func TestInvertedIndex_Remove(t *testing.T) { + fileName := "testdb.iscdb" + defer func() { + _ = os.Remove(fileName) + }() + + now := uint64(time.Now().Unix()) + addParams := []testAddParams{ + {[]byte("foo"), 20, 0}, + {[]byte("food"), 60, now + 3600}, + {[]byte("fore"), 160, 0}, + {[]byte("bar"), 600, now - 3500}, // expired + {[]byte("bare"), 90, now + 7200}, + {[]byte("barricade"), 900, 0}, + {[]byte("pig"), 80, 0}, + } + + keysToRemove := [][]byte{[]byte("foo"), []byte("pig")} + table := []testSearchParams{ + {[]byte("f"), 0, 0, []uint64{60, 160}}, + {[]byte("fo"), 0, 0, []uint64{60, 160}}, + {[]byte("foo"), 0, 0, []uint64{60}}, + {[]byte("for"), 0, 0, []uint64{160}}, + {[]byte("food"), 0, 0, []uint64{60}}, + {[]byte("fore"), 0, 0, []uint64{160}}, + {[]byte("b"), 0, 0, []uint64{90, 900}}, + {[]byte("ba"), 0, 0, []uint64{90, 900}}, + {[]byte("bar"), 0, 0, []uint64{90, 900}}, + {[]byte("bare"), 0, 0, []uint64{90}}, + {[]byte("barr"), 0, 0, []uint64{900}}, + {[]byte("p"), 0, 0, []uint64{}}, + {[]byte("pi"), 0, 0, []uint64{}}, + {[]byte("pig"), 0, 0, []uint64{}}, + } + + searchIdx := createSearchIndex(t, fileName, addParams) + removeManyKeys(t, searchIdx, keysToRemove) + testSearchResults(t, searchIdx, table) +} + +func TestInvertedIndex_Clear(t *testing.T) { + fileName := "testdb.iscdb" + defer func() { + _ = os.Remove(fileName) + }() + + now := uint64(time.Now().Unix()) + addParams := []testAddParams{ + {[]byte("foo"), 20, 0}, + {[]byte("food"), 60, now + 3600}, + {[]byte("fore"), 160, 0}, + {[]byte("bar"), 600, now - 3600}, // expired + {[]byte("bare"), 90, now + 7200}, + {[]byte("barricade"), 900, 0}, + {[]byte("pig"), 80, 0}, + } + + table := []testSearchParams{ + {[]byte("f"), 0, 0, []uint64{}}, + {[]byte("fo"), 0, 0, []uint64{}}, + {[]byte("foo"), 0, 0, []uint64{}}, + {[]byte("for"), 0, 0, []uint64{}}, + {[]byte("food"), 0, 0, []uint64{}}, + {[]byte("fore"), 0, 0, []uint64{}}, + {[]byte("b"), 0, 0, []uint64{}}, + {[]byte("ba"), 0, 0, []uint64{}}, + {[]byte("bar"), 0, 0, []uint64{}}, + {[]byte("bare"), 0, 0, []uint64{}}, + {[]byte("barr"), 0, 0, []uint64{}}, + {[]byte("p"), 0, 0, []uint64{}}, + {[]byte("pi"), 0, 0, []uint64{}}, + {[]byte("pig"), 0, 0, []uint64{}}, + } + + searchIdx := createSearchIndex(t, fileName, addParams) + err := searchIdx.Clear() + if err != nil { + t.Fatalf("error clearing inverted index: %s", err) + } + + testSearchResults(t, searchIdx, table) +} + +func TestInvertedIndex_Close(t *testing.T) { + fileName := "testdb.iscdb" + defer func() { + _ = os.Remove(fileName) + }() + + idx, err := NewInvertedIndex(fileName, nil, nil, nil) + if err != nil { + t.Fatalf("error creating inverted index: %s", err) + } + + err = idx.Close() + if err != nil { + t.Fatalf("error closing inverted index: %s", err) + } + + // Close has already been called on File + assert.NotNil(t, idx.File.Close()) +} + +// removeManyKeys removes many keys from the inverted index +func removeManyKeys(t *testing.T, idx *InvertedIndex, keys [][]byte) { + for _, key := range keys { + err := idx.Remove(key) + if err != nil { + t.Fatalf("error removing key '%s': %s", key, err) + } + } +} + +// testSearchResults tests whether when the inverted index is searched, it returns +// the expected data in `params` +func testSearchResults(t *testing.T, idx *InvertedIndex, params []testSearchParams) { + for _, p := range params { + got, err := idx.Search(p.term, p.skip, p.limit) + if err != nil { + t.Fatalf("error searching for '%s': %s", p.term, err) + } + + assert.Equal(t, p.expected, got) + } +} + +// createSearchIndex creates an inverted index for test purposes, and adds a number of +// test records as passed by the `params` +func createSearchIndex(t *testing.T, filePath string, params []testAddParams) *InvertedIndex { + idx, err := NewInvertedIndex(filePath, nil, nil, nil) + if err != nil { + t.Fatalf("error creating inverted index: %s", err) + } + + err = idx.Clear() + if err != nil { + t.Fatalf("error clearing inverted index: %s", err) + } + + // Add a series of keys and addresses + for _, p := range params { + err = idx.Add(p.k, p.addr, p.expiry) + if err != nil { + t.Fatalf("error adding key address %s: %s", p.k, err) + } + } + + return idx +} diff --git a/scdb/internal/inverted_index_test.go b/scdb/internal/inverted_index_test.go deleted file mode 100644 index 5bf0569..0000000 --- a/scdb/internal/inverted_index_test.go +++ /dev/null @@ -1 +0,0 @@ -package internal diff --git a/scdb/store.go b/scdb/store.go index 2588def..5224e8e 100644 --- a/scdb/store.go +++ b/scdb/store.go @@ -7,6 +7,7 @@ import ( "github.com/sopherapps/go-scdb/scdb/internal/buffers" "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" "github.com/sopherapps/go-scdb/scdb/internal/entries/values" + "github.com/sopherapps/go-scdb/scdb/internal/inverted_index" "os" "path/filepath" "sync" @@ -29,7 +30,7 @@ var zeroU64 = internal.Uint64ToByteArray(0) type Store struct { bufferPool *buffers.BufferPool header *headers.DbFileHeader - searchIndex *internal.InvertedIndex + searchIndex *inverted_index.InvertedIndex closeCh chan bool mu sync.Mutex isClosed bool @@ -88,7 +89,7 @@ func New(path string, maxKeys *uint64, redundantBlocks *uint16, poolCapacity *ui } searchIndexFilePath := filepath.Join(path, defaultSearchIndexFile) - searchIndex, err := internal.NewInvertedIndex(searchIndexFilePath, maxIndexKeyLen, maxKeys, redundantBlocks) + searchIndex, err := inverted_index.NewInvertedIndex(searchIndexFilePath, maxIndexKeyLen, maxKeys, redundantBlocks) if err != nil { return nil, err } From cf2f5b86471f48e8d73930b55ebeb38fd67000e8 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 15:09:55 +0300 Subject: [PATCH 06/15] WIP: Add NewInvertedIndex function --- scdb/internal/buffers/pool.go | 23 +-------- .../internal/inverted_index/inverted_index.go | 48 ++++++++++++++++++- scdb/internal/utils.go | 19 ++++++++ 3 files changed, 68 insertions(+), 22 deletions(-) diff --git a/scdb/internal/buffers/pool.go b/scdb/internal/buffers/pool.go index dfbb1cd..37d6089 100644 --- a/scdb/internal/buffers/pool.go +++ b/scdb/internal/buffers/pool.go @@ -65,7 +65,7 @@ func NewBufferPool(capacity *uint64, filePath string, maxKeys *uint64, redundant poolCap = DefaultPoolCapacity } - dbFileExists, err := pathExists(filePath) + dbFileExists, err := internal.PathExists(filePath) if err != nil { return nil, err } @@ -94,7 +94,7 @@ func NewBufferPool(capacity *uint64, filePath string, maxKeys *uint64, redundant } } - fileSize, err := getFileSize(file) + fileSize, err := internal.GetFileSize(file) if err != nil { return nil, err } @@ -591,25 +591,6 @@ func (bp *BufferPool) Eq(other *BufferPool) bool { return true } -// pathExists checks to see if a given path exists -func pathExists(path string) (bool, error) { - _, err := os.Stat(path) - if os.IsNotExist(err) { - return false, nil - } - - return err == nil, err -} - -// getFileSize computes the file size of the given file -func getFileSize(file *os.File) (uint64, error) { - fileStat, err := file.Stat() - if err != nil { - return 0, err - } - return uint64(fileStat.Size()), nil -} - // getIndexCapacity computes the capacity (i.e. number of buffers) of the buffers to be set aside for index buffers // It can't be less than 1 and it can't be more than the number of index blocks available func getIndexCapacity(numOfIndexBlocks uint64, totalCapacity uint64) uint64 { diff --git a/scdb/internal/inverted_index/inverted_index.go b/scdb/internal/inverted_index/inverted_index.go index ac24e60..60bf5fa 100644 --- a/scdb/internal/inverted_index/inverted_index.go +++ b/scdb/internal/inverted_index/inverted_index.go @@ -1,6 +1,7 @@ package inverted_index import ( + "github.com/sopherapps/go-scdb/scdb/internal" "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" "os" ) @@ -20,7 +21,52 @@ type InvertedIndex struct { // Since we each db key will be represented in the index a number of `max_index_key_len` times // for example the key `food` must have the following index keys: `f`, `fo`, `foo`, `food`. func NewInvertedIndex(filePath string, maxIndexKeyLen *uint32, dbMaxKeys *uint64, dbRedundantBlocks *uint16) (*InvertedIndex, error) { - return nil, nil + blockSize := uint32(os.Getpagesize()) + + dbFileExists, err := internal.PathExists(filePath) + if err != nil { + return nil, err + } + + fileOpenFlag := os.O_RDWR + if !dbFileExists { + fileOpenFlag = fileOpenFlag | os.O_CREATE + } + + file, err := os.OpenFile(filePath, fileOpenFlag, 0666) + if err != nil { + return nil, err + } + + var header *headers.InvertedIndexHeader + if !dbFileExists { + header = headers.NewInvertedIndexHeader(dbMaxKeys, dbRedundantBlocks, &blockSize, maxIndexKeyLen) + _, err = headers.InitializeFile(file, header) + if err != nil { + return nil, err + } + } else { + header, err = headers.ExtractInvertedIndexHeaderFromFile(file) + if err != nil { + return nil, err + } + } + + fileSize, err := internal.GetFileSize(file) + if err != nil { + return nil, err + } + + idx := InvertedIndex{ + File: file, + FilePath: filePath, + MaxIndexKeyLen: header.MaxIndexKeyLen, + ValuesStartPoint: header.ValuesStartPoint, + FileSize: fileSize, + header: header, + } + + return &idx, nil } // Add adds a key's kv address in the corresponding prefixes' lists to update the inverted index diff --git a/scdb/internal/utils.go b/scdb/internal/utils.go index 6c205ef..7951443 100644 --- a/scdb/internal/utils.go +++ b/scdb/internal/utils.go @@ -134,3 +134,22 @@ func ValidateBounds(actualLower uint64, actualUpper uint64, expectedLower uint64 } return nil } + +// PathExists checks to see if a given path exists +func PathExists(path string) (bool, error) { + _, err := os.Stat(path) + if os.IsNotExist(err) { + return false, nil + } + + return err == nil, err +} + +// GetFileSize computes the file size of the given file +func GetFileSize(file *os.File) (uint64, error) { + fileStat, err := file.Stat() + if err != nil { + return 0, err + } + return uint64(fileStat.Size()), nil +} From 3234dc2ed738ba46709bbaacc6cb8dc45d022243 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 18:31:54 +0300 Subject: [PATCH 07/15] WIP: Add `Add`, `Clear` and `Search` ops on InvertedIndex --- .../entries/values/inverted_index_entry.go | 21 +- .../internal/inverted_index/inverted_index.go | 322 +++++++++++++++++- 2 files changed, 341 insertions(+), 2 deletions(-) diff --git a/scdb/internal/entries/values/inverted_index_entry.go b/scdb/internal/entries/values/inverted_index_entry.go index 9ea1f15..3f6111d 100644 --- a/scdb/internal/entries/values/inverted_index_entry.go +++ b/scdb/internal/entries/values/inverted_index_entry.go @@ -1,6 +1,9 @@ package values -import "github.com/sopherapps/go-scdb/scdb/internal" +import ( + "github.com/sopherapps/go-scdb/scdb/internal" + "os" +) const InvertedIndexEntryMinSizeInBytes uint32 = 4 + 4 + 1 + 1 + 8 + 8 + 8 + 8 @@ -158,3 +161,19 @@ func (ide *InvertedIndexEntry) AsBytes() []byte { internal.Uint64ToByteArray(ide.KvAddress), ) } + +// UpdateNextOffsetOnFile updates the next offset of a given entry on the given file at the given address +func (ide *InvertedIndexEntry) UpdateNextOffsetOnFile(file *os.File, entryAddr uint64, newNextOffset uint64) error { + kSize := uint64(ide.Size - ide.IndexKeySize - InvertedIndexEntryMinSizeInBytes) + offset := entryAddr + kSize + uint64(ide.IndexKeySize) + 18 + _, err := file.WriteAt(internal.Uint64ToByteArray(newNextOffset), int64(offset)) + return err +} + +// UpdatePreviousOffsetOnFile updates the previous offset of a given entry on the given file at the given address +func (ide *InvertedIndexEntry) UpdatePreviousOffsetOnFile(file *os.File, entryAddr uint64, newPreviousOffset uint64) error { + kSize := uint64(ide.Size - ide.IndexKeySize - InvertedIndexEntryMinSizeInBytes) + offset := entryAddr + kSize + uint64(ide.IndexKeySize) + 26 + _, err := file.WriteAt(internal.Uint64ToByteArray(newPreviousOffset), int64(offset)) + return err +} diff --git a/scdb/internal/inverted_index/inverted_index.go b/scdb/internal/inverted_index/inverted_index.go index 60bf5fa..4a252ad 100644 --- a/scdb/internal/inverted_index/inverted_index.go +++ b/scdb/internal/inverted_index/inverted_index.go @@ -1,11 +1,19 @@ package inverted_index import ( + "bytes" + "errors" + scdbErrs "github.com/sopherapps/go-scdb/scdb/errors" "github.com/sopherapps/go-scdb/scdb/internal" "github.com/sopherapps/go-scdb/scdb/internal/entries/headers" + "github.com/sopherapps/go-scdb/scdb/internal/entries/values" + "io" + "math" "os" ) +var zeroU64Bytes = make([]byte, headers.IndexEntrySizeInBytes) + type InvertedIndex struct { File *os.File FilePath string @@ -71,6 +79,55 @@ func NewInvertedIndex(filePath string, maxIndexKeyLen *uint32, dbMaxKeys *uint64 // Add adds a key's kv address in the corresponding prefixes' lists to update the inverted index func (idx *InvertedIndex) Add(key []byte, kvAddr uint64, expiry uint64) error { + upperBound := uint32(math.Min(float64(len(key)), float64(idx.MaxIndexKeyLen))) + 1 + + for i := uint32(1); i < upperBound; i++ { + prefix := key[:i] + + indexBlock := uint64(0) + indexOffset := headers.GetIndexOffset(idx.header, prefix) + + for { + indexOffset, err := headers.GetIndexOffsetInNthBlock(idx.header, indexOffset, indexBlock) + if err != nil { + return err + } + + addr, err := idx.readEntryAddress(indexOffset) + if err != nil { + return err + } + + if bytes.Equal(addr, zeroU64Bytes) { + err = idx.appendNewRootEntry(prefix, indexOffset, key, kvAddr, expiry) + if err != nil { + return err + } + + break + } + + isForPrefix, err := idx.addrBelongsToPrefix(addr, prefix) + if err != nil { + return err + } + + if isForPrefix { + err = idx.upsertEntry(prefix, addr, key, kvAddr, expiry) + if err != nil { + return err + } + + break + } + + indexBlock += 1 + if indexBlock >= idx.header.NumberOfIndexBlocks { + return scdbErrs.NewErrCollisionSaturation(prefix) + } + } + } + return nil } @@ -85,7 +142,37 @@ func (idx *InvertedIndex) Add(key []byte, kvAddr uint64, expiry uint64) error { // If `limit` is 0, all items are returned since it would make no sense for someone to search // for zero items. func (idx *InvertedIndex) Search(term []byte, skip uint64, limit uint64) ([]uint64, error) { - return nil, nil + prefixLen := uint32(math.Min(float64(len(term)), float64(idx.MaxIndexKeyLen))) + prefix := term[:prefixLen] + + indexOffset := headers.GetIndexOffset(idx.header, prefix) + + for indexBlock := uint64(0); indexBlock < idx.header.NumberOfIndexBlocks; indexBlock++ { + indexOffset, err := headers.GetIndexOffsetInNthBlock(idx.header, indexOffset, indexBlock) + if err != nil { + return nil, err + } + + addr, err := idx.readEntryAddress(indexOffset) + if err != nil { + return nil, err + } + + if bytes.Equal(addr, zeroU64Bytes) { + return []uint64{}, nil + } + + isForPrefix, err := idx.addrBelongsToPrefix(addr, prefix) + if err != nil { + return nil, err + } + + if isForPrefix { + return idx.getMatchedKvAddrsForPrefix(term, addr, skip, limit) + } + } + + return []uint64{}, nil } // Remove deletes the key's kv address from all prefixes' lists in the inverted index @@ -118,3 +205,236 @@ func (idx *InvertedIndex) Eq(other *InvertedIndex) bool { func (idx *InvertedIndex) Close() error { return idx.File.Close() } + +// getMatchedKvAddrsForPrefix returns the kv_addresses of all items whose db key contain the given `term` +func (idx *InvertedIndex) getMatchedKvAddrsForPrefix(term []byte, prefixRootAddr []byte, skip uint64, limit uint64) ([]uint64, error) { + matchedAddrs := make([]uint64, 0) + skipped := uint64(0) + shouldSlice := limit > 0 + + rootAddr, err := internal.Uint64FromByteArray(prefixRootAddr) + if err != nil { + return nil, err + } + + addr := rootAddr + for { + entryBytes, err := readEntryBytes(idx.File, addr) + if err != nil { + return nil, err + } + + entry, err := values.ExtractInvertedIndexEntryFromByteArray(entryBytes, 0) + if err != nil { + return nil, err + } + + if !values.IsExpired(entry) && bytes.Contains(entry.Key, term) { + if skipped < skip { + skipped++ + } else { + matchedAddrs = append(matchedAddrs, entry.KvAddress) + } + + if shouldSlice && uint64(len(matchedAddrs)) >= limit { + break + } + } + + addr = entry.NextOffset + // The zero check is for data corruption + if addr == rootAddr || addr == 0 { + break + } + } + + return matchedAddrs, nil +} + +// readEntryAddress reads the index at the given address and returns it +func (idx *InvertedIndex) readEntryAddress(addr uint64) ([]byte, error) { + err := internal.ValidateBounds(addr, addr+headers.IndexEntrySizeInBytes, headers.HeaderSizeInBytes, idx.ValuesStartPoint, "entry address out of bound") + if err != nil { + return nil, err + } + + buf := make([]byte, headers.IndexEntrySizeInBytes) + bytesRead, err := idx.File.ReadAt(buf, int64(addr)) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + + return buf[:bytesRead], nil +} + +// appendNewRootEntry appends a new root entry to the index file, and updates the inverted index's index +func (idx *InvertedIndex) appendNewRootEntry(prefix []byte, indexOffset uint64, key []byte, kvAddr uint64, expiry uint64) error { + newAddr := idx.FileSize + + entry := values.NewInvertedIndexEntry(prefix, key, expiry, true, kvAddr, newAddr, newAddr) + entryAsBytes := entry.AsBytes() + _, err := idx.File.WriteAt(entryAsBytes, int64(newAddr)) + if err != nil { + return err + } + + // update index + _, err = idx.File.WriteAt(internal.Uint64ToByteArray(newAddr), int64(indexOffset)) + if err != nil { + return err + } + + // update file size + idx.FileSize = newAddr + uint64(len(entryAsBytes)) + return nil +} + +// addrBelongsToPrefix checks to see if entry address belongs to the given `prefix` (i.e. index key) +// +// It returns false if the address is out of bounds +// or when the index key there is not equal to `prefix`. +func (idx *InvertedIndex) addrBelongsToPrefix(addr []byte, prefix []byte) (bool, error) { + address, err := internal.Uint64FromByteArray(addr) + if err != nil { + return false, err + } + + if address >= idx.FileSize { + return false, nil + } + + prefixLen := uint32(len(prefix)) + indexKeySizeBuf := make([]byte, 4) + bytesRead, err := idx.File.ReadAt(indexKeySizeBuf, int64(address+4)) + if err != nil && !errors.Is(err, io.EOF) { + return false, err + } + + indexKeySize, err := internal.Uint32FromByteArray(indexKeySizeBuf[:bytesRead]) + if err != nil { + return false, err + } + + if prefixLen != indexKeySize { + return false, nil + } + + indexKeyBuf := make([]byte, prefixLen) + bytesRead, err = idx.File.ReadAt(indexKeyBuf, int64(address+8)) + if err != nil && !errors.Is(err, io.EOF) { + return false, err + } + + return bytes.Equal(indexKeyBuf[:bytesRead], prefix), nil +} + +// upsertEntry updates an existing entry whose prefix (or index key) is given and key is also as given. +// +// It starts at the root of the doubly-linked cyclic list for the given prefix, +// looks for the given key. If it finds it, it updates it. If it does not find it, it appends +// the new entry to the end of that list. +func (idx *InvertedIndex) upsertEntry(prefix []byte, rootAddr []byte, key []byte, kvAddr uint64, expiry uint64) error { + rootAddrU64, err := internal.Uint64FromByteArray(rootAddr) + if err != nil { + return err + } + + addr := rootAddrU64 + + for { + entryBytes, err := readEntryBytes(idx.File, addr) + if err != nil { + return err + } + + entry, err := values.ExtractInvertedIndexEntryFromByteArray(entryBytes, 0) + if err != nil { + return err + } + + if bytes.Equal(entry.Key, key) { + entry.KvAddress = kvAddr + entry.Expiry = expiry + _, err := writeEntryToFile(idx.File, addr, entry) + if err != nil { + return err + } + break + } else if entry.NextOffset == rootAddrU64 { + // end of the list, append new item to the list + newEntry := values.NewInvertedIndexEntry(prefix, key, expiry, false, kvAddr, rootAddrU64, addr) + newEntryLen, err := writeEntryToFile(idx.File, idx.FileSize, newEntry) + if err != nil { + return err + } + + // update the next offset of the current entry to this address + err = entry.UpdateNextOffsetOnFile(idx.File, addr, idx.FileSize) + if err != nil { + return err + } + + // update the root entry to have its previous offset point to the newly added entry + rootEntryBytes, err := readEntryBytes(idx.File, rootAddrU64) + if err != nil { + return err + } + rootEntry, err := values.ExtractInvertedIndexEntryFromByteArray(rootEntryBytes, 0) + if err != nil { + return err + } + err = rootEntry.UpdatePreviousOffsetOnFile(idx.File, rootAddrU64, idx.FileSize) + if err != nil { + return err + } + + // increment file size by the new entry's size + idx.FileSize += uint64(newEntryLen) + break + } + + addr = entry.NextOffset + if addr == rootAddrU64 || addr == 0 { + // try to avoid looping forever in case of data corruption or something + // The zero check is for data corruption + break + } + } + + return nil +} + +// writeEntryToFile writes a given entry to the file at the given address, returning the number of bytes written +func writeEntryToFile(file *os.File, addr uint64, entry *values.InvertedIndexEntry) (int, error) { + entryAsBytes := entry.AsBytes() + bytesWritten, err := file.WriteAt(entryAsBytes, int64(addr)) + if err != nil { + return 0, err + } + + return bytesWritten, nil +} + +// readEntryBytes reads a byte array for an entry at the given address in a file. +// / It returns None if the data ended prematurely +func readEntryBytes(file *os.File, addr uint64) ([]byte, error) { + address := int64(addr) + sizeBuf := make([]byte, 4) + bytesRead, err := file.ReadAt(sizeBuf, address) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + + size, err := internal.Uint32FromByteArray(sizeBuf[:bytesRead]) + if err != nil { + return nil, err + } + + buf := make([]byte, size) + bytesRead, err = file.ReadAt(buf, address) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + + return buf[:bytesRead], nil +} From 64830de6fe0b025a801f410924ba676a494955b4 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 19:23:09 +0300 Subject: [PATCH 08/15] WIP: Add `Remove` Op to InvertedIndex --- .../internal/inverted_index/inverted_index.go | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/scdb/internal/inverted_index/inverted_index.go b/scdb/internal/inverted_index/inverted_index.go index 4a252ad..f4bb2ca 100644 --- a/scdb/internal/inverted_index/inverted_index.go +++ b/scdb/internal/inverted_index/inverted_index.go @@ -177,6 +177,52 @@ func (idx *InvertedIndex) Search(term []byte, skip uint64, limit uint64) ([]uint // Remove deletes the key's kv address from all prefixes' lists in the inverted index func (idx *InvertedIndex) Remove(key []byte) error { + upperBound := uint32(math.Min(float64(len(key)), float64(idx.MaxIndexKeyLen))) + 1 + + for i := uint32(1); i < upperBound; i++ { + prefix := key[:i] + + indexBlock := uint64(0) + indexOffset := headers.GetIndexOffset(idx.header, prefix) + + for { + indexOffset, err := headers.GetIndexOffsetInNthBlock(idx.header, indexOffset, indexBlock) + if err != nil { + return err + } + + addr, err := idx.readEntryAddress(indexOffset) + if err != nil { + return err + } + + if bytes.Equal(addr, zeroU64Bytes) { + // prefix does not exist + break + } + + isForPrefix, err := idx.addrBelongsToPrefix(addr, prefix) + if err != nil { + return err + } + + if isForPrefix { + err = idx.removeKeyForPrefix(indexOffset, addr, key) + if err != nil { + return err + } + + break + } + + indexBlock += 1 + if indexBlock >= idx.header.NumberOfIndexBlocks { + // prefix not found + break + } + } + } + return nil } @@ -404,6 +450,117 @@ func (idx *InvertedIndex) upsertEntry(prefix []byte, rootAddr []byte, key []byte return nil } +// removeKeyForPrefix removes the given key from the cyclic linked list for the given `rootAddr` +func (idx *InvertedIndex) removeKeyForPrefix(indexAddr uint64, rootAddr []byte, key []byte) error { + rootAddrU64, err := internal.Uint64FromByteArray(rootAddr) + if err != nil { + return err + } + + addr := rootAddrU64 + + for { + entryBytes, err := readEntryBytes(idx.File, addr) + if err != nil { + return err + } + + entry, err := values.ExtractInvertedIndexEntryFromByteArray(entryBytes, 0) + if err != nil { + return err + } + + if bytes.Equal(entry.Key, key) { + previousAddr := entry.PreviousOffset + nextAddr := entry.NextOffset + + // Deal with the next item + if nextAddr != addr { + nextEntryBytes, err := readEntryBytes(idx.File, nextAddr) + if err != nil { + return err + } + + nextEntry, err := values.ExtractInvertedIndexEntryFromByteArray(nextEntryBytes, 0) + if err != nil { + return err + } + + nextEntry.PreviousOffset = entry.PreviousOffset + + // if next addr is the same as the previous addr, treat nextEntry as previous also + if nextAddr == previousAddr { + nextEntry.NextOffset = entry.NextOffset + } + + // make next entry a root entry since the one being removed is a root entry + if entry.IsRoot { + nextEntry.IsRoot = true + // update the root address so that it does not loop forever + rootAddrU64 = nextAddr + } + + _, err = writeEntryToFile(idx.File, nextAddr, nextEntry) + if err != nil { + return err + } + } + + // Deal with previous item + if previousAddr != addr && previousAddr != nextAddr { + prevEntryBytes, err := readEntryBytes(idx.File, previousAddr) + if err != nil { + return err + } + + prevEntry, err := values.ExtractInvertedIndexEntryFromByteArray(prevEntryBytes, 0) + if err != nil { + return err + } + + prevEntry.NextOffset = entry.NextOffset + _, err = writeEntryToFile(idx.File, previousAddr, prevEntry) + if err != nil { + return err + } + } + + // Deal with current item + entry.IsDeleted = true + _, err = writeEntryToFile(idx.File, addr, entry) + if err != nil { + return err + } + + // Update index + // if the entry to delete is at the root, and is the only element, reset the index + if addr == rootAddrU64 && nextAddr == addr { + _, err = idx.File.WriteAt(zeroU64Bytes, int64(indexAddr)) + if err != nil { + return err + } + } else if entry.IsRoot { + // the entry being removed is a root entry but there are other elements after it + // Update the index to contain the address of the next entry + _, err = idx.File.WriteAt(internal.Uint64ToByteArray(nextAddr), int64(indexAddr)) + if err != nil { + return err + } + } + + addr = entry.NextOffset + // if we have cycled back to the root entry, exit + // The zero check is for data corruption + if addr == rootAddrU64 || addr == 0 { + break + } + } + } + + return nil + +} + // writeEntryToFile writes a given entry to the file at the given address, returning the number of bytes written func writeEntryToFile(file *os.File, addr uint64, entry *values.InvertedIndexEntry) (int, error) { entryAsBytes := entry.AsBytes() From e8f7d9ad2057575f23f10dc2405f54005cdce352 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 19:30:58 +0300 Subject: [PATCH 09/15] Fix store.Search tests --- scdb/store.go | 5 +++++ scdb/store_test.go | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/scdb/store.go b/scdb/store.go index 5224e8e..18eb11e 100644 --- a/scdb/store.go +++ b/scdb/store.go @@ -71,6 +71,11 @@ type Store struct { // A new key-value pair is created and the old one is left unindexed. // Compaction is important because it reclaims this space and reduces the size // of the database file. +// +// - `maxIndexKeyLen` - default 3: +// The maximum number of characters in each key in the search inverted index +// The inverted index is used for full-text search of keys to get all key-values +// whose keys start with a given byte array. func New(path string, maxKeys *uint64, redundantBlocks *uint16, poolCapacity *uint64, compactionInterval *uint32, maxIndexKeyLen *uint32) (*Store, error) { err := os.MkdirAll(path, 0755) if err != nil { diff --git a/scdb/store_test.go b/scdb/store_test.go index d89e9d2..77f7b4a 100644 --- a/scdb/store_test.go +++ b/scdb/store_test.go @@ -166,7 +166,7 @@ func TestStore_Search(t *testing.T) { {[]byte("bandana"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("bare"), 0, 0, []buffers.KeyValuePair{}}, } - recordsToExpire := []testRecord{RECORDS[0], RECORDS[2], RECORDS[3]} + recordsToExpire := []testRecord{SEARCH_RECORDS[0], SEARCH_RECORDS[2], SEARCH_RECORDS[3]} ttl := uint64(1) insertRecords(t, store, SEARCH_RECORDS, nil) insertRecords(t, store, recordsToExpire, &ttl) From ff57501d9729713887ef5373c34def8108ef6332 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 19:56:45 +0300 Subject: [PATCH 10/15] Remove goroutines from store as they are slow --- scdb/store.go | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/scdb/store.go b/scdb/store.go index 18eb11e..f97b01d 100644 --- a/scdb/store.go +++ b/scdb/store.go @@ -244,12 +244,6 @@ func (s *Store) Delete(k []byte) error { s.mu.Lock() defer s.mu.Unlock() - done := make(chan error) - // Update the search index in a separate goroutine. - go func(ch chan<- error) { - ch <- s.searchIndex.Remove(k) - }(done) - initialIdxOffset := headers.GetIndexOffset(s.header, k) for idxBlock := uint64(0); idxBlock < s.header.NumberOfIndexBlocks; idxBlock++ { @@ -284,8 +278,7 @@ func (s *Store) Delete(k []byte) error { } // if it is not found, no error is thrown - err := <-done // wait for search index to complete - return err + return s.searchIndex.Remove(k) } // Clear removes all data in the store @@ -293,19 +286,12 @@ func (s *Store) Clear() error { s.mu.Lock() defer s.mu.Unlock() - done := make(chan error) - // Update the search index in a separate goroutine. - go func(ch chan<- error) { - ch <- s.searchIndex.Clear() - }(done) - err := s.bufferPool.ClearFile() if err != nil { return err } - err = <-done // wait for search index goroutine to end - return err + return s.searchIndex.Clear() } // Compact manually removes dangling key-value pairs in the database file From 7063cfcee0dee2de3e7696a77efc5d7aa5b45614 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 19:58:16 +0300 Subject: [PATCH 11/15] Add benchmarks to README.md --- README.md | 136 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index c1186a1..e997fd5 100644 --- a/README.md +++ b/README.md @@ -81,13 +81,15 @@ func main() { var redundantBlocks uint16 = 1 var poolCapacity uint64 = 10 var compactionInterval uint32 = 1_800 + var maxIndexKeyLen uint32 = 3 store, err := scdb.New( "db", &maxKeys, &redundantBlocks, &poolCapacity, - &compactionInterval) + &compactionInterval, + &maxIndexKeyLen) if err != nil { log.Fatalf("error opening store: %s", err) } @@ -137,6 +139,20 @@ func main() { fmt.Printf("Key: %s, Value: %s", k, value) } + // searching: without pagination + kvs, err := store.Search([]byte("h"), 0, 0) + if err != nil { + log.Fatalf("error searching 'h': %s", err) + } + fmt.Printf("\nno pagination: %v", kvs) + + // searching with pagination: get last two + kvs, err = store.Search([]byte("h"), 2, 2) + if err != nil { + log.Fatalf("error searching (paginated) 'h': %s", err) + } + fmt.Printf("\nskip 2, limit 2: %v", kvs) + // deleting for k := range records { err := store.Delete([]byte(k)) @@ -177,6 +193,8 @@ folder of the [rust scdb](https://github.com/sopherapps/scdb) to get up to speed - [database file format](https://github.com/sopherapps/scdb/tree/master/docs/DB_FILE_FORMAT.md) - [how it works](https://github.com/sopherapps/scdb/tree/master/docs/HOW_IT_WORKS.md) +- [inverted index file format](https://github.com/sopherapps/scdb/tree/master/docs/INVERTED_INDEX_FILE_FORMAT.md) +- [how the search works](https://github.com/sopherapps/scdb/tree/master/docs/HOW_INVERTED_INDEX_WORKS.md) ## Bindings @@ -220,53 +238,77 @@ On a average PC ``` cpu: Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz -BenchmarkStore_Clear/Clear-8 47697 26092 ns/op -BenchmarkStore_Clear/Clear_with_ttl:_3600-8 44049 26306 ns/op -BenchmarkStore_Compact/Compact-8 50 23524929 ns/op -BenchmarkStore_DeleteWithoutTtl/Delete_key_hey-8 507656 2210 ns/op -BenchmarkStore_DeleteWithoutTtl/Delete_key_hi-8 510031 4315 ns/op -BenchmarkStore_DeleteWithoutTtl/Delete_key_salut-8 261019 4299 ns/op -BenchmarkStore_DeleteWithoutTtl/Delete_key_bonjour-8 271724 4404 ns/op -BenchmarkStore_DeleteWithoutTtl/Delete_key_hola-8 259988 4357 ns/op -BenchmarkStore_DeleteWithoutTtl/Delete_key_oi-8 267859 4361 ns/op -BenchmarkStore_DeleteWithoutTtl/Delete_key_mulimuta-8 250986 5283 ns/op -BenchmarkStore_DeleteWithTtl/Delete_key_hey-8 490582 2264 ns/op -BenchmarkStore_DeleteWithTtl/Delete_key_hi-8 268174 4277 ns/op -BenchmarkStore_DeleteWithTtl/Delete_key_salut-8 269565 4300 ns/op -BenchmarkStore_DeleteWithTtl/Delete_key_bonjour-8 272019 4307 ns/op -BenchmarkStore_DeleteWithTtl/Delete_key_hola-8 257732 4287 ns/op -BenchmarkStore_DeleteWithTtl/Delete_key_oi-8 271117 4249 ns/op -BenchmarkStore_DeleteWithTtl/Delete_key_mulimuta-8 276910 4215 ns/op -BenchmarkStore_GetWithoutTtl/Get_hey-8 7362078 161.7 ns/op -BenchmarkStore_GetWithoutTtl/Get_hi-8 7282934 159.3 ns/op -BenchmarkStore_GetWithoutTtl/Get_salut-8 7472350 158.3 ns/op -BenchmarkStore_GetWithoutTtl/Get_bonjour-8 7422486 160.3 ns/op -BenchmarkStore_GetWithoutTtl/Get_hola-8 7516166 158.0 ns/op -BenchmarkStore_GetWithoutTtl/Get_oi-8 7470390 159.1 ns/op -BenchmarkStore_GetWithoutTtl/Get_mulimuta-8 7591210 156.5 ns/op -BenchmarkStore_GetWithTtl/Get_hey-8 4966746 241.0 ns/op -BenchmarkStore_GetWithTtl/Get_hi-8 4985324 239.3 ns/op -BenchmarkStore_GetWithTtl/Get_salut-8 4951470 239.2 ns/op -BenchmarkStore_GetWithTtl/Get_bonjour-8 4941150 239.8 ns/op -BenchmarkStore_GetWithTtl/Get_hola-8 4962139 239.5 ns/op -BenchmarkStore_GetWithTtl/Get_oi-8 4968688 239.6 ns/op -BenchmarkStore_GetWithTtl/Get_mulimuta-8 5011912 237.9 ns/op -BenchmarkStore_SetWithoutTtl/Set_hey_English-8 243872 5192 ns/op -BenchmarkStore_SetWithoutTtl/Set_hi_English-8 163945 6830 ns/op -BenchmarkStore_SetWithoutTtl/Set_salut_French-8 174580 6806 ns/op -BenchmarkStore_SetWithoutTtl/Set_bonjour_French-8 171808 6840 ns/op -BenchmarkStore_SetWithoutTtl/Set_hola_Spanish-8 160465 6927 ns/op -BenchmarkStore_SetWithoutTtl/Set_oi_Portuguese-8 169678 6841 ns/op -BenchmarkStore_SetWithoutTtl/Set_mulimuta_Runyoro-8 171794 6791 ns/op -BenchmarkStore_SetWithTtl/Set_hey_English-8 236829 5312 ns/op -BenchmarkStore_SetWithTtl/Set_hi_English-8 169725 6874 ns/op -BenchmarkStore_SetWithTtl/Set_salut_French-8 173904 6847 ns/op -BenchmarkStore_SetWithTtl/Set_bonjour_French-8 172405 6839 ns/op -BenchmarkStore_SetWithTtl/Set_hola_Spanish-8 171945 6827 ns/op -BenchmarkStore_SetWithTtl/Set_oi_Portuguese-8 171859 6852 ns/op -BenchmarkStore_SetWithTtl/Set_mulimuta_Runyoro-8 171380 6845 ns/op +BenchmarkStore_Clear/Clear-8 12363 126526 ns/op +BenchmarkStore_Clear/Clear_with_ttl:_3600-8 13052 89014 ns/op +BenchmarkStore_Compact/Compact-8 52 23302258 ns/op +BenchmarkStore_DeleteWithoutTtl/Delete_key_hey-8 505140 3094 ns/op +BenchmarkStore_DeleteWithoutTtl/Delete_key_hi-8 245188 4587 ns/op +BenchmarkStore_DeleteWithoutTtl/Delete_key_salut-8 260808 4530 ns/op +BenchmarkStore_DeleteWithoutTtl/Delete_key_bonjour-8 259333 4697 ns/op +BenchmarkStore_DeleteWithoutTtl/Delete_key_hola-8 253994 4579 ns/op +BenchmarkStore_DeleteWithoutTtl/Delete_key_oi-8 260127 4552 ns/op +BenchmarkStore_DeleteWithoutTtl/Delete_key_mulimuta-8 259500 4551 ns/op +BenchmarkStore_DeleteWithTtl/Delete_key_hey-8 495697 3050 ns/op +BenchmarkStore_DeleteWithTtl/Delete_key_hi-8 265194 4796 ns/op +BenchmarkStore_DeleteWithTtl/Delete_key_salut-8 233242 4715 ns/op +BenchmarkStore_DeleteWithTtl/Delete_key_bonjour-8 261645 4521 ns/op +BenchmarkStore_DeleteWithTtl/Delete_key_hola-8 255002 4779 ns/op +BenchmarkStore_DeleteWithTtl/Delete_key_oi-8 247960 4761 ns/op +BenchmarkStore_DeleteWithTtl/Delete_key_mulimuta-8 245869 4810 ns/op +BenchmarkStore_GetWithoutTtl/Get_hey-8 6655038 185.4 ns/op +BenchmarkStore_GetWithoutTtl/Get_hi-8 6674360 181.5 ns/op +BenchmarkStore_GetWithoutTtl/Get_salut-8 6404012 204.9 ns/op +BenchmarkStore_GetWithoutTtl/Get_bonjour-8 6227780 185.7 ns/op +BenchmarkStore_GetWithoutTtl/Get_hola-8 6207739 184.4 ns/op +BenchmarkStore_GetWithoutTtl/Get_oi-8 6102019 188.5 ns/op +BenchmarkStore_GetWithoutTtl/Get_mulimuta-8 6649304 184.0 ns/op +BenchmarkStore_GetWithTtl/Get_hey-8 4420294 273.9 ns/op +BenchmarkStore_GetWithTtl/Get_hi-8 4404975 268.1 ns/op +BenchmarkStore_GetWithTtl/Get_salut-8 3829527 280.7 ns/op +BenchmarkStore_GetWithTtl/Get_bonjour-8 4427978 268.8 ns/op +BenchmarkStore_GetWithTtl/Get_hola-8 4660736 258.8 ns/op +BenchmarkStore_GetWithTtl/Get_oi-8 4547602 265.8 ns/op +BenchmarkStore_GetWithTtl/Get_mulimuta-8 4750611 249.1 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_f-8 81596 14615 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_fo-8 71950 15022 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_foo-8 110924 11228 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_for-8 161625 7348 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_b-8 101258 11272 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_ba-8 112938 11045 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_bar-8 171814 7295 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_ban-8 163743 7187 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_pigg-8 234506 4902 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_p-8 178639 6935 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_pi-8 180256 7168 ns/op +BenchmarkStore_SearchWithoutPagination/Search_(no_pagination)_pig-8 167142 7267 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_f-8 86421 13569 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_fo-8 77089 13472 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_foo-8 128644 8989 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_for-8 258955 4672 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_b-8 139004 8836 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_ba-8 136581 8899 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_bar-8 245930 5010 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_ban-8 253870 4970 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_pigg-8 256216 4833 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_p-8 257278 4863 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_pi-8 254498 4831 ns/op +BenchmarkStore_SearchWithPagination/Search_(paginated)_pig-8 259162 4754 ns/op +BenchmarkStore_SetWithoutTtl/Set_hey_English-8 52761 23906 ns/op +BenchmarkStore_SetWithoutTtl/Set_hi_English-8 43544 28114 ns/op +BenchmarkStore_SetWithoutTtl/Set_salut_French-8 35671 34184 ns/op +BenchmarkStore_SetWithoutTtl/Set_bonjour_French-8 35151 33110 ns/op +BenchmarkStore_SetWithoutTtl/Set_hola_Spanish-8 33321 36255 ns/op +BenchmarkStore_SetWithoutTtl/Set_oi_Portuguese-8 49029 24633 ns/op +BenchmarkStore_SetWithoutTtl/Set_mulimuta_Runyoro-8 36476 32611 ns/op +BenchmarkStore_SetWithTtl/Set_hey_English-8 51962 24385 ns/op +BenchmarkStore_SetWithTtl/Set_hi_English-8 39193 28665 ns/op +BenchmarkStore_SetWithTtl/Set_salut_French-8 33957 33743 ns/op +BenchmarkStore_SetWithTtl/Set_bonjour_French-8 31314 35946 ns/op +BenchmarkStore_SetWithTtl/Set_hola_Spanish-8 28106 40356 ns/op +BenchmarkStore_SetWithTtl/Set_oi_Portuguese-8 43882 25837 ns/op +BenchmarkStore_SetWithTtl/Set_mulimuta_Runyoro-8 36912 33885 ns/op PASS -ok github.com/sopherapps/go-scdb/scdb 60.630s +ok github.com/sopherapps/go-scdb/scdb 100.150s ``` ## Acknowledgements From 1b8fb4f26aac6480b40e2b619585675c7d20426e Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 20:01:44 +0300 Subject: [PATCH 12/15] Update CHANGELOG.md --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1792683..d04cdc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +## [0.1.0] - 2023-01-14 + +### Added + +- Added full-text search for keys, with pagination using `store.Search(term, skip, limit)` + +### Changed + +- Changed the `scdb::New()` signature to include `maxIndexKeyLen` option. + +### Fixed + ## [0.0.7] - 2022-11-9 ### Added From 29f5fdb640221a0b2df04ec69a2f19e5418d0dbf Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 20:04:32 +0300 Subject: [PATCH 13/15] Fix example --- examples/sample.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/examples/sample.go b/examples/sample.go index 4785a4d..c1c75c2 100644 --- a/examples/sample.go +++ b/examples/sample.go @@ -21,13 +21,15 @@ func main() { var redundantBlocks uint16 = 1 var poolCapacity uint64 = 10 var compactionInterval uint32 = 1_800 + var maxIndexKeyLen uint32 = 3 store, err := scdb.New( "db", &maxKeys, &redundantBlocks, &poolCapacity, - &compactionInterval) + &compactionInterval, + &maxIndexKeyLen) if err != nil { log.Fatalf("error opening store: %s", err) } @@ -77,6 +79,20 @@ func main() { fmt.Printf("Key: %s, Value: %s", k, value) } + // searching: without pagination + kvs, err := store.Search([]byte("h"), 0, 0) + if err != nil { + log.Fatalf("error searching 'h': %s", err) + } + fmt.Printf("\nno pagination: %v", kvs) + + // searching with pagination: get last two + kvs, err = store.Search([]byte("h"), 2, 2) + if err != nil { + log.Fatalf("error searching (paginated) 'h': %s", err) + } + fmt.Printf("\nskip 2, limit 2: %v", kvs) + // deleting for k := range records { err := store.Delete([]byte(k)) From 3926e50d0a5ab292ba7c9283744c33129f9639a9 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 20:13:38 +0300 Subject: [PATCH 14/15] Format basing on go vet --- scdb/store_test.go | 76 +++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/scdb/store_test.go b/scdb/store_test.go index 77f7b4a..8846a2c 100644 --- a/scdb/store_test.go +++ b/scdb/store_test.go @@ -94,19 +94,19 @@ func TestStore_Search(t *testing.T) { t.Run("SearchWithoutPaginationReturnsAllMatchedKeyValues", func(t *testing.T) { table := []testParams{ - {[]byte("f"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("foo"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("food"), 0, 0, []buffers.KeyValuePair{{[]byte("food"), []byte("lug")}}}, - {[]byte("for"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, - {[]byte("b"), 0, 0, []buffers.KeyValuePair{{[]byte("bar"), []byte("port")}, {[]byte("band"), []byte("nyoro")}}}, - {[]byte("ba"), 0, 0, []buffers.KeyValuePair{{[]byte("bar"), []byte("port")}, {[]byte("band"), []byte("nyoro")}}}, - {[]byte("bar"), 0, 0, []buffers.KeyValuePair{{[]byte("bar"), []byte("port")}}}, - {[]byte("ban"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, - {[]byte("band"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, - {[]byte("p"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, - {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, - {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("f"), 0, 0, []buffers.KeyValuePair{{K: []byte("foo"), V: []byte("eng")}, {K: []byte("fore"), V: []byte("span")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{K: []byte("foo"), V: []byte("eng")}, {K: []byte("fore"), V: []byte("span")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("foo"), 0, 0, []buffers.KeyValuePair{{K: []byte("foo"), V: []byte("eng")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("food"), 0, 0, []buffers.KeyValuePair{{K: []byte("food"), V: []byte("lug")}}}, + {[]byte("for"), 0, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, + {[]byte("b"), 0, 0, []buffers.KeyValuePair{{K: []byte("bar"), V: []byte("port")}, {K: []byte("band"), V: []byte("nyoro")}}}, + {[]byte("ba"), 0, 0, []buffers.KeyValuePair{{K: []byte("bar"), V: []byte("port")}, {K: []byte("band"), V: []byte("nyoro")}}}, + {[]byte("bar"), 0, 0, []buffers.KeyValuePair{{K: []byte("bar"), V: []byte("port")}}}, + {[]byte("ban"), 0, 0, []buffers.KeyValuePair{{K: []byte("band"), V: []byte("nyoro")}}}, + {[]byte("band"), 0, 0, []buffers.KeyValuePair{{K: []byte("band"), V: []byte("nyoro")}}}, + {[]byte("p"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, + {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, + {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, {[]byte("pigg"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("bandana"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("bare"), 0, 0, []buffers.KeyValuePair{}}, @@ -125,15 +125,15 @@ func TestStore_Search(t *testing.T) { t.Run("SearchWithPaginationSkipsSomeAndReturnsNotMoreThanLimit", func(t *testing.T) { table := []testParams{ - {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("fo"), 0, 8, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("fo"), 1, 8, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("fo"), 1, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("fo"), 0, 2, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}, {[]byte("fore"), []byte("span")}}}, - {[]byte("fo"), 1, 2, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}, {[]byte("food"), []byte("lug")}}}, - {[]byte("fo"), 0, 1, []buffers.KeyValuePair{{[]byte("foo"), []byte("eng")}}}, - {[]byte("fo"), 2, 1, []buffers.KeyValuePair{{[]byte("food"), []byte("lug")}}}, - {[]byte("fo"), 1, 1, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{K: []byte("foo"), V: []byte("eng")}, {K: []byte("fore"), V: []byte("span")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("fo"), 0, 8, []buffers.KeyValuePair{{K: []byte("foo"), V: []byte("eng")}, {K: []byte("fore"), V: []byte("span")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("fo"), 1, 8, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("fo"), 1, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("fo"), 0, 2, []buffers.KeyValuePair{{K: []byte("foo"), V: []byte("eng")}, {K: []byte("fore"), V: []byte("span")}}}, + {[]byte("fo"), 1, 2, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}, {K: []byte("food"), V: []byte("lug")}}}, + {[]byte("fo"), 0, 1, []buffers.KeyValuePair{{K: []byte("foo"), V: []byte("eng")}}}, + {[]byte("fo"), 2, 1, []buffers.KeyValuePair{{K: []byte("food"), V: []byte("lug")}}}, + {[]byte("fo"), 1, 1, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, } insertRecords(t, store, SEARCH_RECORDS, nil) @@ -149,18 +149,18 @@ func TestStore_Search(t *testing.T) { t.Run("SearchAfterExpirationReturnsNoExpiredKeysValues", func(t *testing.T) { table := []testParams{ - {[]byte("f"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, - {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("f"), 0, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, {[]byte("foo"), 0, 0, []buffers.KeyValuePair{}}, - {[]byte("for"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, - {[]byte("b"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, - {[]byte("ba"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, + {[]byte("for"), 0, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, + {[]byte("b"), 0, 0, []buffers.KeyValuePair{{K: []byte("band"), V: []byte("nyoro")}}}, + {[]byte("ba"), 0, 0, []buffers.KeyValuePair{{K: []byte("band"), V: []byte("nyoro")}}}, {[]byte("bar"), 0, 0, []buffers.KeyValuePair{}}, - {[]byte("ban"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, - {[]byte("band"), 0, 0, []buffers.KeyValuePair{{[]byte("band"), []byte("nyoro")}}}, - {[]byte("p"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, - {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, - {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("ban"), 0, 0, []buffers.KeyValuePair{{K: []byte("band"), V: []byte("nyoro")}}}, + {[]byte("band"), 0, 0, []buffers.KeyValuePair{{K: []byte("band"), V: []byte("nyoro")}}}, + {[]byte("p"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, + {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, + {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, {[]byte("pigg"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("food"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("bandana"), 0, 0, []buffers.KeyValuePair{}}, @@ -185,18 +185,18 @@ func TestStore_Search(t *testing.T) { t.Run("SearchAfterDeleteReturnsNoDeletedKeyValues", func(t *testing.T) { table := []testParams{ - {[]byte("f"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, - {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("f"), 0, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, + {[]byte("fo"), 0, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, {[]byte("foo"), 0, 0, []buffers.KeyValuePair{}}, - {[]byte("for"), 0, 0, []buffers.KeyValuePair{{[]byte("fore"), []byte("span")}}}, + {[]byte("for"), 0, 0, []buffers.KeyValuePair{{K: []byte("fore"), V: []byte("span")}}}, {[]byte("b"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("ba"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("bar"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("ban"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("band"), 0, 0, []buffers.KeyValuePair{}}, - {[]byte("p"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, - {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, - {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{[]byte("pig"), []byte("dan")}}}, + {[]byte("p"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, + {[]byte("pi"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, + {[]byte("pig"), 0, 0, []buffers.KeyValuePair{{K: []byte("pig"), V: []byte("dan")}}}, {[]byte("pigg"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("food"), 0, 0, []buffers.KeyValuePair{}}, {[]byte("bandana"), 0, 0, []buffers.KeyValuePair{}}, From a6b6983d8cbd937dd52b2981e73752e62ff59b28 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 14 Jan 2023 20:14:41 +0300 Subject: [PATCH 15/15] Fix typo in CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d04cdc8..cbb7c45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Changed -- Changed the `scdb::New()` signature to include `maxIndexKeyLen` option. +- Changed the `scdb.New()` signature to include `maxIndexKeyLen` option. ### Fixed