kevo/pkg/common/iterator/composite/hierarchical.go
Jeremy Tregunna a0a1c0512f
All checks were successful
Go Tests / Run Tests (1.24.2) (push) Successful in 9m50s
chore: formatting
2025-04-22 14:09:54 -06:00

291 lines
6.7 KiB
Go

package composite
import (
"bytes"
"sync"
"github.com/KevoDB/kevo/pkg/common/iterator"
)
// HierarchicalIterator implements an iterator that follows the LSM-tree hierarchy
// where newer sources (earlier in the sources slice) take precedence over older sources.
// When multiple sources contain the same key, the value from the newest source is used.
type HierarchicalIterator struct {
// Iterators in order from newest to oldest
iterators []iterator.Iterator
// Current key and value
key []byte
value []byte
// Current valid state
valid bool
// Mutex for thread safety
mu sync.RWMutex
}
// NewHierarchicalIterator creates a new hierarchical iterator
// Sources must be provided in newest-to-oldest order
func NewHierarchicalIterator(iterators []iterator.Iterator) *HierarchicalIterator {
return &HierarchicalIterator{
iterators: iterators,
}
}
// SeekToFirst positions the iterator at the first key
func (h *HierarchicalIterator) SeekToFirst() {
h.mu.Lock()
defer h.mu.Unlock()
// Position all iterators at their first key
for _, iter := range h.iterators {
iter.SeekToFirst()
}
// Find the first key across all iterators
h.findNextUniqueKey(nil)
}
// SeekToLast positions the iterator at the last key
func (h *HierarchicalIterator) SeekToLast() {
h.mu.Lock()
defer h.mu.Unlock()
// Position all iterators at their last key
for _, iter := range h.iterators {
iter.SeekToLast()
}
// Find the last key by taking the maximum key
var maxKey []byte
var maxValue []byte
var maxSource int = -1
for i, iter := range h.iterators {
if !iter.Valid() {
continue
}
key := iter.Key()
if maxKey == nil || bytes.Compare(key, maxKey) > 0 {
maxKey = key
maxValue = iter.Value()
maxSource = i
}
}
if maxSource >= 0 {
h.key = maxKey
h.value = maxValue
h.valid = true
} else {
h.valid = false
}
}
// Seek positions the iterator at the first key >= target
func (h *HierarchicalIterator) Seek(target []byte) bool {
h.mu.Lock()
defer h.mu.Unlock()
// Seek all iterators to the target
for _, iter := range h.iterators {
iter.Seek(target)
}
// For seek, we need to find the smallest key >= target
var bestKey []byte
var bestValue []byte
var bestIterIdx int = -1
h.valid = false
// First pass: find the smallest key >= target
for i, iter := range h.iterators {
if !iter.Valid() {
continue
}
key := iter.Key()
// Skip keys < target (Seek should return keys >= target)
if bytes.Compare(key, target) < 0 {
continue
}
// If we haven't found a valid key yet, or this key is smaller than the current best key
if bestIterIdx == -1 || bytes.Compare(key, bestKey) < 0 {
// This becomes our best candidate so far
bestKey = key
bestValue = iter.Value()
bestIterIdx = i
}
}
// Now we need to check if any newer iterators have the same key
if bestIterIdx != -1 {
// Check all newer iterators (earlier in the slice) for the same key
for i := 0; i < bestIterIdx; i++ {
iter := h.iterators[i]
if !iter.Valid() {
continue
}
// If a newer iterator has the same key, use its value
if bytes.Equal(iter.Key(), bestKey) {
bestValue = iter.Value()
break // Since iterators are in newest-to-oldest order, we can stop at the first match
}
}
// Set the found key/value
h.key = bestKey
h.value = bestValue
h.valid = true
return true
}
return false
}
// Next advances the iterator to the next key
func (h *HierarchicalIterator) Next() bool {
h.mu.Lock()
defer h.mu.Unlock()
if !h.valid {
return false
}
// Remember current key to skip duplicates
currentKey := h.key
// Find the next unique key after the current key
return h.findNextUniqueKey(currentKey)
}
// Key returns the current key
func (h *HierarchicalIterator) Key() []byte {
h.mu.RLock()
defer h.mu.RUnlock()
if !h.valid {
return nil
}
return h.key
}
// Value returns the current value
func (h *HierarchicalIterator) Value() []byte {
h.mu.RLock()
defer h.mu.RUnlock()
if !h.valid {
return nil
}
return h.value
}
// Valid returns true if the iterator is positioned at a valid entry
func (h *HierarchicalIterator) Valid() bool {
h.mu.RLock()
defer h.mu.RUnlock()
return h.valid
}
// IsTombstone returns true if the current entry is a deletion marker
func (h *HierarchicalIterator) IsTombstone() bool {
h.mu.RLock()
defer h.mu.RUnlock()
// If not valid, it can't be a tombstone
if !h.valid {
return false
}
// For hierarchical iterator, we infer tombstones from the value being nil
// This is used during compaction to distinguish between regular nil values and tombstones
return h.value == nil
}
// NumSources returns the number of source iterators
func (h *HierarchicalIterator) NumSources() int {
return len(h.iterators)
}
// GetSourceIterators returns the underlying source iterators
func (h *HierarchicalIterator) GetSourceIterators() []iterator.Iterator {
return h.iterators
}
// findNextUniqueKey finds the next key after the given key
// If prevKey is nil, finds the first key
// Returns true if a valid key was found
func (h *HierarchicalIterator) findNextUniqueKey(prevKey []byte) bool {
// Find the smallest key among all iterators that is > prevKey
var bestKey []byte
var bestValue []byte
var bestIterIdx int = -1
h.valid = false
// First pass: advance all iterators past prevKey and find the smallest next key
for i, iter := range h.iterators {
// Skip invalid iterators
if !iter.Valid() {
continue
}
// Skip keys <= prevKey if we're looking for the next key
if prevKey != nil && bytes.Compare(iter.Key(), prevKey) <= 0 {
// Advance to find a key > prevKey
for iter.Valid() && bytes.Compare(iter.Key(), prevKey) <= 0 {
if !iter.Next() {
break
}
}
// If we couldn't find a key > prevKey or the iterator is no longer valid, skip it
if !iter.Valid() {
continue
}
}
// Get the current key
key := iter.Key()
// If we haven't found a valid key yet, or this key is smaller than the current best key
if bestIterIdx == -1 || bytes.Compare(key, bestKey) < 0 {
// This becomes our best candidate so far
bestKey = key
bestValue = iter.Value()
bestIterIdx = i
}
}
// Now we need to check if any newer iterators have the same key
if bestIterIdx != -1 {
// Check all newer iterators (earlier in the slice) for the same key
for i := 0; i < bestIterIdx; i++ {
iter := h.iterators[i]
if !iter.Valid() {
continue
}
// If a newer iterator has the same key, use its value
if bytes.Equal(iter.Key(), bestKey) {
bestValue = iter.Value()
break // Since iterators are in newest-to-oldest order, we can stop at the first match
}
}
// Set the found key/value
h.key = bestKey
h.value = bestValue
h.valid = true
return true
}
return false
}