refactor: optimize two pass deduping into a single pass across the codebase

refactor: use memtable sorted order iterator to write and dedup in a single pass
fix: flushing a memtable dedups keys as its processing them before creating the SSTable
2025-04-21 16:27:54 -06:00 · 2025-04-21 14:53:03 -06:00 · 2025-04-21 14:53:03 -06:00
4 changed files with 248 additions and 161 deletions
--- a/pkg/common/iterator/composite/hierarchical.go
+++ b/pkg/common/iterator/composite/hierarchical.go
@ -94,48 +94,54 @@ func (h *HierarchicalIterator) Seek(target []byte) bool {
 		iter.Seek(target)
 	}

-	// For seek, we need to treat it differently than findNextUniqueKey since we want
-	// keys >= target, not strictly > target
-	var minKey []byte
-	var minValue []byte
-	var seenKeys = make(map[string]bool)
+	// For seek, we need to find the smallest key >= target
+	var bestKey []byte
+	var bestValue []byte
+	var bestIterIdx int = -1
 	h.valid = false

-	// Find the smallest key >= target from all iterators
-	for _, iter := range h.iterators {
+	// First pass: find the smallest key >= target
+	for i, iter := range h.iterators {
 		if !iter.Valid() {
 			continue
 		}

 		key := iter.Key()
-		value := iter.Value()

 		// Skip keys < target (Seek should return keys >= target)
 		if bytes.Compare(key, target) < 0 {
 			continue
 		}

-		// Convert key to string for map lookup
-		keyStr := string(key)
-
-		// Only use this key if we haven't seen it from a newer iterator
-		if !seenKeys[keyStr] {
-			// Mark as seen
-			seenKeys[keyStr] = true
-
-			// Update min key if needed
-			if minKey == nil || bytes.Compare(key, minKey) < 0 {
-				minKey = key
-				minValue = value
-				h.valid = true
-			}
+		// If we haven't found a valid key yet, or this key is smaller than the current best key
+		if bestIterIdx == -1 || bytes.Compare(key, bestKey) < 0 {
+			// This becomes our best candidate so far
+			bestKey = key
+			bestValue = iter.Value()
+			bestIterIdx = i
 		}
 	}

-	// Set the found key/value
-	if h.valid {
-		h.key = minKey
-		h.value = minValue
+	// Now we need to check if any newer iterators have the same key
+	if bestIterIdx != -1 {
+		// Check all newer iterators (earlier in the slice) for the same key
+		for i := 0; i < bestIterIdx; i++ {
+			iter := h.iterators[i]
+			if !iter.Valid() {
+				continue
+			}
+			
+			// If a newer iterator has the same key, use its value
+			if bytes.Equal(iter.Key(), bestKey) {
+				bestValue = iter.Value()
+				break  // Since iterators are in newest-to-oldest order, we can stop at the first match
+			}
+		}
+		
+		// Set the found key/value
+		h.key = bestKey
+		h.value = bestValue
+		h.valid = true
 		return true
 	}

@ -218,23 +224,20 @@ func (h *HierarchicalIterator) GetSourceIterators() []iterator.Iterator {
 // Returns true if a valid key was found
 func (h *HierarchicalIterator) findNextUniqueKey(prevKey []byte) bool {
 	// Find the smallest key among all iterators that is > prevKey
-	var minKey []byte
-	var minValue []byte
-	var seenKeys = make(map[string]bool)
+	var bestKey []byte
+	var bestValue []byte
+	var bestIterIdx int = -1
 	h.valid = false

-	// First pass: collect all valid keys and find min key > prevKey
-	for _, iter := range h.iterators {
+	// First pass: advance all iterators past prevKey and find the smallest next key
+	for i, iter := range h.iterators {
 		// Skip invalid iterators
 		if !iter.Valid() {
 			continue
 		}

-		key := iter.Key()
-		value := iter.Value()
-
 		// Skip keys <= prevKey if we're looking for the next key
-		if prevKey != nil && bytes.Compare(key, prevKey) <= 0 {
+		if prevKey != nil && bytes.Compare(iter.Key(), prevKey) <= 0 {
 			// Advance to find a key > prevKey
 			for iter.Valid() && bytes.Compare(iter.Key(), prevKey) <= 0 {
 				if !iter.Next() {
@ -246,38 +249,40 @@ func (h *HierarchicalIterator) findNextUniqueKey(prevKey []byte) bool {
 			if !iter.Valid() {
 				continue
 			}
-
-			// Get the new key after advancing
-			key = iter.Key()
-			value = iter.Value()
-
-			// If key is still <= prevKey after advancing, skip this iterator
-			if bytes.Compare(key, prevKey) <= 0 {
-				continue
-			}
 		}

-		// Convert key to string for map lookup
-		keyStr := string(key)
-
-		// If this key hasn't been seen before, or this is a newer source for the same key
-		if !seenKeys[keyStr] {
-			// Mark this key as seen - it's from the newest source
-			seenKeys[keyStr] = true
-
-			// Check if this is a new minimum key
-			if minKey == nil || bytes.Compare(key, minKey) < 0 {
-				minKey = key
-				minValue = value
-				h.valid = true
-			}
+		// Get the current key
+		key := iter.Key()
+		
+		// If we haven't found a valid key yet, or this key is smaller than the current best key
+		if bestIterIdx == -1 || bytes.Compare(key, bestKey) < 0 {
+			// This becomes our best candidate so far
+			bestKey = key
+			bestValue = iter.Value()
+			bestIterIdx = i
 		}
 	}

-	// Set the key/value if we found a valid one
-	if h.valid {
-		h.key = minKey
-		h.value = minValue
+	// Now we need to check if any newer iterators have the same key
+	if bestIterIdx != -1 {
+		// Check all newer iterators (earlier in the slice) for the same key
+		for i := 0; i < bestIterIdx; i++ {
+			iter := h.iterators[i]
+			if !iter.Valid() {
+				continue
+			}
+			
+			// If a newer iterator has the same key, use its value
+			if bytes.Equal(iter.Key(), bestKey) {
+				bestValue = iter.Value()
+				break  // Since iterators are in newest-to-oldest order, we can stop at the first match
+			}
+		}
+		
+		// Set the found key/value
+		h.key = bestKey
+		h.value = bestValue
+		h.valid = true
 		return true
 	}

--- a/pkg/engine/engine.go
+++ b/pkg/engine/engine.go
@ -510,11 +510,25 @@ func (e *Engine) flushMemTable(mem *memtable.MemTable) error {
 	count := 0
 	var bytesWritten uint64

-	// Write all entries to the SSTable
+	// Since memtable's skiplist returns keys in sorted order,
+	// but possibly with duplicates (newer versions of same key first),
+	// we need to track all processed keys (including tombstones)
+	var processedKeys = make(map[string]struct{})
+
 	for iter.SeekToFirst(); iter.Valid(); iter.Next() {
-		// Skip deletion markers, only add value entries
+		key := iter.Key()
+		keyStr := string(key) // Use as map key
+		
+		// Skip keys we've already processed (including tombstones)
+		if _, seen := processedKeys[keyStr]; seen {
+			continue
+		}
+		
+		// Mark this key as processed regardless of whether it's a value or tombstone
+		processedKeys[keyStr] = struct{}{}
+		
+		// Only write non-tombstone entries to the SSTable
 		if value := iter.Value(); value != nil {
-			key := iter.Key()
 			bytesWritten += uint64(len(key) + len(value))
 			if err := writer.Add(key, value); err != nil {
 				writer.Abort()
--- a/pkg/engine/engine_test.go
+++ b/pkg/engine/engine_test.go
@ -74,6 +74,128 @@ func TestEngine_BasicOperations(t *testing.T) {
 	}
 }

+func TestEngine_SameKeyMultipleOperationsFlush(t *testing.T) {
+	_, engine, cleanup := setupTest(t)
+	defer cleanup()
+
+	// Simulate exactly the bug scenario from the CLI
+	// Add the same key multiple times with different values
+	key := []byte("foo")
+	
+	// First add 
+	if err := engine.Put(key, []byte("23")); err != nil {
+		t.Fatalf("Failed to put first value: %v", err)
+	}
+
+	// Delete it
+	if err := engine.Delete(key); err != nil {
+		t.Fatalf("Failed to delete key: %v", err)
+	}
+	
+	// Add it again with different value
+	if err := engine.Put(key, []byte("42")); err != nil {
+		t.Fatalf("Failed to re-add key: %v", err)
+	}
+	
+	// Add another key
+	if err := engine.Put([]byte("bar"), []byte("23")); err != nil {
+		t.Fatalf("Failed to add another key: %v", err)
+	}
+	
+	// Add another key
+	if err := engine.Put([]byte("user:1"), []byte(`{"name":"John"}`)); err != nil {
+		t.Fatalf("Failed to add another key: %v", err)
+	}
+
+	// Verify before flush
+	value, err := engine.Get(key)
+	if err != nil {
+		t.Fatalf("Failed to get key before flush: %v", err)
+	}
+	if !bytes.Equal(value, []byte("42")) {
+		t.Errorf("Got incorrect value before flush. Expected: %s, Got: %s", "42", string(value))
+	}
+
+	// Force a flush of the memtable - this would have failed before the fix
+	tables := engine.memTablePool.GetMemTables()
+	if err := engine.flushMemTable(tables[0]); err != nil {
+		t.Fatalf("Error in flush with same key multiple operations: %v", err)
+	}
+
+	// Verify all keys after flush
+	value, err = engine.Get(key)
+	if err != nil {
+		t.Fatalf("Failed to get key after flush: %v", err)
+	}
+	if !bytes.Equal(value, []byte("42")) {
+		t.Errorf("Got incorrect value after flush. Expected: %s, Got: %s", "42", string(value))
+	}
+	
+	value, err = engine.Get([]byte("bar"))
+	if err != nil {
+		t.Fatalf("Failed to get 'bar' after flush: %v", err)
+	}
+	if !bytes.Equal(value, []byte("23")) {
+		t.Errorf("Got incorrect value for 'bar' after flush. Expected: %s, Got: %s", "23", string(value))
+	}
+	
+	value, err = engine.Get([]byte("user:1"))
+	if err != nil {
+		t.Fatalf("Failed to get 'user:1' after flush: %v", err)
+	}
+	if !bytes.Equal(value, []byte(`{"name":"John"}`)) {
+		t.Errorf("Got incorrect value for 'user:1' after flush. Expected: %s, Got: %s", `{"name":"John"}`, string(value))
+	}
+}
+
+func TestEngine_DuplicateKeysFlush(t *testing.T) {
+	_, engine, cleanup := setupTest(t)
+	defer cleanup()
+
+	// Test with a key that will be deleted and re-added multiple times
+	key := []byte("foo")
+	
+	// Add the key
+	if err := engine.Put(key, []byte("42")); err != nil {
+		t.Fatalf("Failed to put initial value: %v", err)
+	}
+
+	// Delete the key
+	if err := engine.Delete(key); err != nil {
+		t.Fatalf("Failed to delete key: %v", err)
+	}
+
+	// Re-add the key with a different value
+	if err := engine.Put(key, []byte("43")); err != nil {
+		t.Fatalf("Failed to re-add key: %v", err)
+	}
+
+	// Delete again
+	if err := engine.Delete(key); err != nil {
+		t.Fatalf("Failed to delete key again: %v", err)
+	}
+
+	// Re-add once more
+	if err := engine.Put(key, []byte("44")); err != nil {
+		t.Fatalf("Failed to re-add key again: %v", err)
+	}
+
+	// Force a flush of the memtable
+	tables := engine.memTablePool.GetMemTables()
+	if err := engine.flushMemTable(tables[0]); err != nil {
+		t.Fatalf("Error flushing with duplicate keys: %v", err)
+	}
+
+	// Verify the key has the latest value
+	value, err := engine.Get(key)
+	if err != nil {
+		t.Fatalf("Failed to get key after flush: %v", err)
+	}
+	if !bytes.Equal(value, []byte("44")) {
+		t.Errorf("Got incorrect value after flush. Expected: %s, Got: %s", "44", string(value))
+	}
+}
+
 func TestEngine_MemTableFlush(t *testing.T) {
 	dir, engine, cleanup := setupTest(t)
 	defer cleanup()
--- a/pkg/engine/iterator.go
+++ b/pkg/engine/iterator.go
@ -440,41 +440,23 @@ func (c *chainedIterator) SeekToFirst() {
 		iter.SeekToFirst()
 	}

-	// Maps to track the best (newest) source for each key
-	keyToSource := make(map[string]int) // Key -> best source index
-	keyToLevel := make(map[string]int)  // Key -> best source level (lower is better)
-	keyToPos := make(map[string][]byte) // Key -> binary key value (for ordering)
-
-	// First pass: Find the best source for each key
+	// Find the iterator with the smallest key from the newest source
+	c.current = -1
+	
+	// Find the smallest valid key
 	for i, iter := range c.iterators {
 		if !iter.Valid() {
 			continue
 		}
-
-		// Use string key for map
-		keyStr := string(iter.Key())
-		keyBytes := iter.Key()
-		level := c.sources[i].GetLevel()
-
-		// If we haven't seen this key yet, or this source is newer
-		bestLevel, seen := keyToLevel[keyStr]
-		if !seen || level < bestLevel {
-			keyToSource[keyStr] = i
-			keyToLevel[keyStr] = level
-			keyToPos[keyStr] = keyBytes
-		}
-	}
-
-	// Find the smallest key in our deduplicated set
-	c.current = -1
-	var smallestKey []byte
-
-	for keyStr, sourceIdx := range keyToSource {
-		keyBytes := keyToPos[keyStr]
-
-		if c.current == -1 || bytes.Compare(keyBytes, smallestKey) < 0 {
-			c.current = sourceIdx
-			smallestKey = keyBytes
+		
+		// If we haven't found a key yet, or this key is smaller than the current smallest
+		if c.current == -1 || bytes.Compare(iter.Key(), c.iterators[c.current].Key()) < 0 {
+			c.current = i
+		} else if bytes.Equal(iter.Key(), c.iterators[c.current].Key()) {
+			// If keys are equal, prefer the newer source (lower level)
+			if c.sources[i].GetLevel() < c.sources[c.current].GetLevel() {
+				c.current = i
+			}
 		}
 	}
 }
@ -515,41 +497,23 @@ func (c *chainedIterator) Seek(target []byte) bool {
 		iter.Seek(target)
 	}

-	// Maps to track the best (newest) source for each key
-	keyToSource := make(map[string]int) // Key -> best source index
-	keyToLevel := make(map[string]int)  // Key -> best source level (lower is better)
-	keyToPos := make(map[string][]byte) // Key -> binary key value (for ordering)
-
-	// First pass: Find the best source for each key
+	// Find the iterator with the smallest key from the newest source
+	c.current = -1
+	
+	// Find the smallest valid key
 	for i, iter := range c.iterators {
 		if !iter.Valid() {
 			continue
 		}
-
-		// Use string key for map
-		keyStr := string(iter.Key())
-		keyBytes := iter.Key()
-		level := c.sources[i].GetLevel()
-
-		// If we haven't seen this key yet, or this source is newer
-		bestLevel, seen := keyToLevel[keyStr]
-		if !seen || level < bestLevel {
-			keyToSource[keyStr] = i
-			keyToLevel[keyStr] = level
-			keyToPos[keyStr] = keyBytes
-		}
-	}
-
-	// Find the smallest key in our deduplicated set
-	c.current = -1
-	var smallestKey []byte
-
-	for keyStr, sourceIdx := range keyToSource {
-		keyBytes := keyToPos[keyStr]
-
-		if c.current == -1 || bytes.Compare(keyBytes, smallestKey) < 0 {
-			c.current = sourceIdx
-			smallestKey = keyBytes
+		
+		// If we haven't found a key yet, or this key is smaller than the current smallest
+		if c.current == -1 || bytes.Compare(iter.Key(), c.iterators[c.current].Key()) < 0 {
+			c.current = i
+		} else if bytes.Equal(iter.Key(), c.iterators[c.current].Key()) {
+			// If keys are equal, prefer the newer source (lower level)
+			if c.sources[i].GetLevel() < c.sources[c.current].GetLevel() {
+				c.current = i
+			}
 		}
 	}

@ -571,46 +535,28 @@ func (c *chainedIterator) Next() bool {
 		}
 	}

-	// Maps to track the best (newest) source for each key
-	keyToSource := make(map[string]int) // Key -> best source index
-	keyToLevel := make(map[string]int)  // Key -> best source level (lower is better)
-	keyToPos := make(map[string][]byte) // Key -> binary key value (for ordering)
-
-	// First pass: Find the best source for each key
+	// Find the iterator with the smallest key from the newest source
+	c.current = -1
+	
+	// Find the smallest valid key that is greater than the current key
 	for i, iter := range c.iterators {
 		if !iter.Valid() {
 			continue
 		}
-
-		// Use string key for map
-		keyStr := string(iter.Key())
-		keyBytes := iter.Key()
-		level := c.sources[i].GetLevel()
-
-		// If this key is the same as current, skip it
-		if bytes.Equal(keyBytes, currentKey) {
+		
+		// Skip if the key is the same as the current key (we've already advanced past it)
+		if bytes.Equal(iter.Key(), currentKey) {
 			continue
 		}
-
-		// If we haven't seen this key yet, or this source is newer
-		bestLevel, seen := keyToLevel[keyStr]
-		if !seen || level < bestLevel {
-			keyToSource[keyStr] = i
-			keyToLevel[keyStr] = level
-			keyToPos[keyStr] = keyBytes
-		}
-	}
-
-	// Find the smallest key in our deduplicated set
-	c.current = -1
-	var smallestKey []byte
-
-	for keyStr, sourceIdx := range keyToSource {
-		keyBytes := keyToPos[keyStr]
-
-		if c.current == -1 || bytes.Compare(keyBytes, smallestKey) < 0 {
-			c.current = sourceIdx
-			smallestKey = keyBytes
+		
+		// If we haven't found a key yet, or this key is smaller than the current smallest
+		if c.current == -1 || bytes.Compare(iter.Key(), c.iterators[c.current].Key()) < 0 {
+			c.current = i
+		} else if bytes.Equal(iter.Key(), c.iterators[c.current].Key()) {
+			// If keys are equal, prefer the newer source (lower level)
+			if c.sources[i].GetLevel() < c.sources[c.current].GetLevel() {
+				c.current = i
+			}
 		}
 	}
Author	SHA1	Message	Date
Jeremy Tregunna	001934e7b5	refactor: optimize two pass deduping into a single pass across the codebase All checks were successful Go Tests / Run Tests (1.24.2) (push) Successful in 9m38s Details	2025-04-21 16:27:54 -06:00
Jeremy Tregunna	1a0de922af	refactor: use memtable sorted order iterator to write and dedup in a single pass	2025-04-21 14:53:03 -06:00
Jeremy Tregunna	4e813aa3fd	fix: flushing a memtable dedups keys as its processing them before creating the SSTable	2025-04-21 14:53:03 -06:00