Some checks failed
Go Tests / Run Tests (1.24.2) (push) Has been cancelled
Adds a complete LSM-based storage engine with these features: - Single-writer based architecture for the storage engine - WAL for durability, and hey it's configurable - MemTable with skip list implementation for fast read/writes - SSTable with block-based structure for on-disk level-based storage - Background compaction with tiered strategy - ACID transactions - Good documentation (I hope)
377 lines
9.3 KiB
Go
377 lines
9.3 KiB
Go
package sstable
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
"sync"
|
|
|
|
"github.com/jer/kevo/pkg/sstable/block"
|
|
)
|
|
|
|
// Iterator iterates over key-value pairs in an SSTable
|
|
type Iterator struct {
|
|
reader *Reader
|
|
indexIterator *block.Iterator
|
|
dataBlockIter *block.Iterator
|
|
currentBlock *block.Reader
|
|
err error
|
|
initialized bool
|
|
mu sync.Mutex
|
|
}
|
|
|
|
// SeekToFirst positions the iterator at the first key
|
|
func (it *Iterator) SeekToFirst() {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
// Reset error state
|
|
it.err = nil
|
|
|
|
// Position index iterator at the first entry
|
|
it.indexIterator.SeekToFirst()
|
|
|
|
// Load the first valid data block
|
|
if it.indexIterator.Valid() {
|
|
// Skip invalid entries
|
|
if len(it.indexIterator.Value()) < 8 {
|
|
it.skipInvalidIndexEntries()
|
|
}
|
|
|
|
if it.indexIterator.Valid() {
|
|
// Load the data block
|
|
it.loadCurrentDataBlock()
|
|
|
|
// Position the data block iterator at the first key
|
|
if it.dataBlockIter != nil {
|
|
it.dataBlockIter.SeekToFirst()
|
|
}
|
|
}
|
|
}
|
|
|
|
if !it.indexIterator.Valid() || it.dataBlockIter == nil {
|
|
// No valid index entries
|
|
it.resetBlockIterator()
|
|
}
|
|
|
|
it.initialized = true
|
|
}
|
|
|
|
// SeekToLast positions the iterator at the last key
|
|
func (it *Iterator) SeekToLast() {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
// Reset error state
|
|
it.err = nil
|
|
|
|
// Find the last unique block by tracking all seen blocks
|
|
lastBlockOffset, lastBlockValid := it.findLastUniqueBlockOffset()
|
|
|
|
// Position index at an entry pointing to the last block
|
|
if lastBlockValid {
|
|
it.indexIterator.SeekToFirst()
|
|
for it.indexIterator.Valid() {
|
|
if len(it.indexIterator.Value()) >= 8 {
|
|
blockOffset := binary.LittleEndian.Uint64(it.indexIterator.Value()[:8])
|
|
if blockOffset == lastBlockOffset {
|
|
break
|
|
}
|
|
}
|
|
it.indexIterator.Next()
|
|
}
|
|
|
|
// Load the last data block
|
|
it.loadCurrentDataBlock()
|
|
|
|
// Position the data block iterator at the last key
|
|
if it.dataBlockIter != nil {
|
|
it.dataBlockIter.SeekToLast()
|
|
}
|
|
} else {
|
|
// No valid index entries
|
|
it.resetBlockIterator()
|
|
}
|
|
|
|
it.initialized = true
|
|
}
|
|
|
|
// Seek positions the iterator at the first key >= target
|
|
func (it *Iterator) Seek(target []byte) bool {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
// Reset error state
|
|
it.err = nil
|
|
it.initialized = true
|
|
|
|
// Find the block that might contain the key
|
|
// The index contains the first key of each block
|
|
if !it.indexIterator.Seek(target) {
|
|
// If seeking in the index fails, try the last block
|
|
it.indexIterator.SeekToLast()
|
|
if !it.indexIterator.Valid() {
|
|
// No blocks in the SSTable
|
|
it.resetBlockIterator()
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Load the data block at the current index position
|
|
it.loadCurrentDataBlock()
|
|
if it.dataBlockIter == nil {
|
|
return false
|
|
}
|
|
|
|
// Try to find the target key in this block
|
|
if it.dataBlockIter.Seek(target) {
|
|
// Found a key >= target in this block
|
|
return true
|
|
}
|
|
|
|
// If we didn't find the key in this block, it might be in a later block
|
|
return it.seekInNextBlocks()
|
|
}
|
|
|
|
// Next advances the iterator to the next key
|
|
func (it *Iterator) Next() bool {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
if !it.initialized {
|
|
it.SeekToFirst()
|
|
return it.Valid()
|
|
}
|
|
|
|
if it.dataBlockIter == nil {
|
|
// If we don't have a current block, attempt to load the one at the current index position
|
|
if it.indexIterator.Valid() {
|
|
it.loadCurrentDataBlock()
|
|
if it.dataBlockIter != nil {
|
|
it.dataBlockIter.SeekToFirst()
|
|
return it.dataBlockIter.Valid()
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Try to advance within current block
|
|
if it.dataBlockIter.Next() {
|
|
// Successfully moved to the next entry in the current block
|
|
return true
|
|
}
|
|
|
|
// We've reached the end of the current block, so try to move to the next block
|
|
return it.advanceToNextBlock()
|
|
}
|
|
|
|
// Key returns the current key
|
|
func (it *Iterator) Key() []byte {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
if !it.initialized || it.dataBlockIter == nil || !it.dataBlockIter.Valid() {
|
|
return nil
|
|
}
|
|
return it.dataBlockIter.Key()
|
|
}
|
|
|
|
// Value returns the current value
|
|
func (it *Iterator) Value() []byte {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
if !it.initialized || it.dataBlockIter == nil || !it.dataBlockIter.Valid() {
|
|
return nil
|
|
}
|
|
return it.dataBlockIter.Value()
|
|
}
|
|
|
|
// Valid returns true if the iterator is positioned at a valid entry
|
|
func (it *Iterator) Valid() bool {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
return it.initialized && it.dataBlockIter != nil && it.dataBlockIter.Valid()
|
|
}
|
|
|
|
// IsTombstone returns true if the current entry is a deletion marker
|
|
func (it *Iterator) IsTombstone() bool {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
// Not valid means not a tombstone
|
|
if !it.initialized || it.dataBlockIter == nil || !it.dataBlockIter.Valid() {
|
|
return false
|
|
}
|
|
|
|
// For SSTable iterators, a nil value always represents a tombstone
|
|
// The block iterator's Value method will return nil for tombstones
|
|
return it.dataBlockIter.Value() == nil
|
|
}
|
|
|
|
// Error returns any error encountered during iteration
|
|
func (it *Iterator) Error() error {
|
|
it.mu.Lock()
|
|
defer it.mu.Unlock()
|
|
|
|
return it.err
|
|
}
|
|
|
|
// Helper methods for common operations
|
|
|
|
// resetBlockIterator resets current block and iterator
|
|
func (it *Iterator) resetBlockIterator() {
|
|
it.currentBlock = nil
|
|
it.dataBlockIter = nil
|
|
}
|
|
|
|
// skipInvalidIndexEntries advances the index iterator past any invalid entries
|
|
func (it *Iterator) skipInvalidIndexEntries() {
|
|
for it.indexIterator.Next() {
|
|
if len(it.indexIterator.Value()) >= 8 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// findLastUniqueBlockOffset scans the index to find the offset of the last unique block
|
|
func (it *Iterator) findLastUniqueBlockOffset() (uint64, bool) {
|
|
seenBlocks := make(map[uint64]bool)
|
|
var lastBlockOffset uint64
|
|
var lastBlockValid bool
|
|
|
|
// Position index iterator at the first entry
|
|
it.indexIterator.SeekToFirst()
|
|
|
|
// Scan through all blocks to find the last unique one
|
|
for it.indexIterator.Valid() {
|
|
if len(it.indexIterator.Value()) >= 8 {
|
|
blockOffset := binary.LittleEndian.Uint64(it.indexIterator.Value()[:8])
|
|
if !seenBlocks[blockOffset] {
|
|
seenBlocks[blockOffset] = true
|
|
lastBlockOffset = blockOffset
|
|
lastBlockValid = true
|
|
}
|
|
}
|
|
it.indexIterator.Next()
|
|
}
|
|
|
|
return lastBlockOffset, lastBlockValid
|
|
}
|
|
|
|
// seekInNextBlocks attempts to find the target key in subsequent blocks
|
|
func (it *Iterator) seekInNextBlocks() bool {
|
|
var foundValidKey bool
|
|
|
|
// Store current block offset to skip duplicates
|
|
var currentBlockOffset uint64
|
|
if len(it.indexIterator.Value()) >= 8 {
|
|
currentBlockOffset = binary.LittleEndian.Uint64(it.indexIterator.Value()[:8])
|
|
}
|
|
|
|
// Try subsequent blocks, skipping duplicates
|
|
for it.indexIterator.Next() {
|
|
// Skip invalid entries or duplicates of the current block
|
|
if !it.indexIterator.Valid() || len(it.indexIterator.Value()) < 8 {
|
|
continue
|
|
}
|
|
|
|
nextBlockOffset := binary.LittleEndian.Uint64(it.indexIterator.Value()[:8])
|
|
if nextBlockOffset == currentBlockOffset {
|
|
// This is a duplicate index entry pointing to the same block, skip it
|
|
continue
|
|
}
|
|
|
|
// Found a new block, update current offset
|
|
currentBlockOffset = nextBlockOffset
|
|
|
|
it.loadCurrentDataBlock()
|
|
if it.dataBlockIter == nil {
|
|
return false
|
|
}
|
|
|
|
// Position at the first key in the next block
|
|
it.dataBlockIter.SeekToFirst()
|
|
if it.dataBlockIter.Valid() {
|
|
foundValidKey = true
|
|
break
|
|
}
|
|
}
|
|
|
|
return foundValidKey
|
|
}
|
|
|
|
// advanceToNextBlock moves to the next unique block
|
|
func (it *Iterator) advanceToNextBlock() bool {
|
|
// Store the current block's offset to find the next unique block
|
|
var currentBlockOffset uint64
|
|
if len(it.indexIterator.Value()) >= 8 {
|
|
currentBlockOffset = binary.LittleEndian.Uint64(it.indexIterator.Value()[:8])
|
|
}
|
|
|
|
// Find next block with a different offset
|
|
nextBlockFound := it.findNextUniqueBlock(currentBlockOffset)
|
|
|
|
if !nextBlockFound || !it.indexIterator.Valid() {
|
|
// No more unique blocks in the index
|
|
it.resetBlockIterator()
|
|
return false
|
|
}
|
|
|
|
// Load the next block
|
|
it.loadCurrentDataBlock()
|
|
if it.dataBlockIter == nil {
|
|
return false
|
|
}
|
|
|
|
// Start at the beginning of the new block
|
|
it.dataBlockIter.SeekToFirst()
|
|
return it.dataBlockIter.Valid()
|
|
}
|
|
|
|
// findNextUniqueBlock advances the index iterator to find a block with a different offset
|
|
func (it *Iterator) findNextUniqueBlock(currentBlockOffset uint64) bool {
|
|
for it.indexIterator.Next() {
|
|
// Skip invalid entries or entries pointing to the same block
|
|
if !it.indexIterator.Valid() || len(it.indexIterator.Value()) < 8 {
|
|
continue
|
|
}
|
|
|
|
nextBlockOffset := binary.LittleEndian.Uint64(it.indexIterator.Value()[:8])
|
|
if nextBlockOffset != currentBlockOffset {
|
|
// Found a new block
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// loadCurrentDataBlock loads the data block at the current index iterator position
|
|
func (it *Iterator) loadCurrentDataBlock() {
|
|
// Check if index iterator is valid
|
|
if !it.indexIterator.Valid() {
|
|
it.resetBlockIterator()
|
|
it.err = fmt.Errorf("index iterator not valid")
|
|
return
|
|
}
|
|
|
|
// Parse block location from index value
|
|
locator, err := ParseBlockLocator(it.indexIterator.Key(), it.indexIterator.Value())
|
|
if err != nil {
|
|
it.err = fmt.Errorf("failed to parse block locator: %w", err)
|
|
it.resetBlockIterator()
|
|
return
|
|
}
|
|
|
|
// Fetch the block using the reader's block fetcher
|
|
blockReader, err := it.reader.blockFetcher.FetchBlock(locator.Offset, locator.Size)
|
|
if err != nil {
|
|
it.err = fmt.Errorf("failed to fetch block: %w", err)
|
|
it.resetBlockIterator()
|
|
return
|
|
}
|
|
|
|
it.currentBlock = blockReader
|
|
it.dataBlockIter = blockReader.Iterator()
|
|
}
|