package sstable import ( "encoding/binary" "fmt" "os" "path/filepath" "testing" "git.canoozie.net/jer/go-storage/pkg/sstable/block" ) func TestSSTableWriteRead(t *testing.T) { fmt.Println("===== Starting TestSSTableWriteRead =====") defer fmt.Println("===== TestSSTableWriteRead Done =====") // Create a temporary directory for the test tempDir := t.TempDir() sstablePath := filepath.Join(tempDir, "test.sst") // Create a new SSTable writer writer, err := NewWriter(sstablePath) if err != nil { t.Fatalf("Failed to create SSTable writer: %v", err) } // Add some key-value pairs numEntries := 100 keyValues := make(map[string]string, numEntries) for i := 0; i < numEntries; i++ { key := fmt.Sprintf("key%05d", i) value := fmt.Sprintf("value%05d", i) keyValues[key] = value err := writer.Add([]byte(key), []byte(value)) if err != nil { t.Fatalf("Failed to add entry: %v", err) } } // Finish writing err = writer.Finish() if err != nil { t.Fatalf("Failed to finish SSTable: %v", err) } // Open the SSTable for reading reader, err := OpenReader(sstablePath) if err != nil { t.Fatalf("Failed to open SSTable: %v", err) } defer reader.Close() // Verify the number of entries if reader.numEntries != uint32(numEntries) { t.Errorf("Expected %d entries, got %d", numEntries, reader.numEntries) } // Print file information t.Logf("SSTable file size: %d bytes", reader.fileSize) t.Logf("Index offset: %d", reader.indexOffset) t.Logf("Index size: %d", reader.indexSize) t.Logf("Entries in table: %d", reader.numEntries) // Check what's in the index indexIter := reader.indexBlock.Iterator() t.Log("Index entries:") count := 0 for indexIter.SeekToFirst(); indexIter.Valid(); indexIter.Next() { if count < 10 { // Log the first 10 entries only indexValue := indexIter.Value() blockOffset := binary.LittleEndian.Uint64(indexValue[:8]) blockSize := binary.LittleEndian.Uint32(indexValue[8:12]) t.Logf(" Index key: %s, block offset: %d, block size: %d", string(indexIter.Key()), blockOffset, blockSize) // Read the block and see what keys it contains blockData := make([]byte, blockSize) _, err := reader.file.ReadAt(blockData, int64(blockOffset)) if err == nil { blockReader, err := block.NewReader(blockData) if err == nil { blockIter := blockReader.Iterator() t.Log(" Block contents:") keysInBlock := 0 for blockIter.SeekToFirst(); blockIter.Valid() && keysInBlock < 10; blockIter.Next() { t.Logf(" Key: %s, Value: %s", string(blockIter.Key()), string(blockIter.Value())) keysInBlock++ } if keysInBlock >= 10 { t.Logf(" ... and more keys") } } } } count++ } t.Logf("Total index entries: %d", count) // Read some keys for i := 0; i < numEntries; i += 10 { key := fmt.Sprintf("key%05d", i) expectedValue := keyValues[key] value, err := reader.Get([]byte(key)) if err != nil { t.Errorf("Failed to get key %s: %v", key, err) continue } if string(value) != expectedValue { t.Errorf("Value mismatch for key %s: expected %s, got %s", key, expectedValue, value) } } // Try to read a non-existent key _, err = reader.Get([]byte("nonexistent")) if err != ErrNotFound { t.Errorf("Expected ErrNotFound for non-existent key, got: %v", err) } } func TestSSTableIterator(t *testing.T) { fmt.Println("===== Starting TestSSTableIterator =====") defer fmt.Println("===== TestSSTableIterator Done =====") // Create a temporary directory for the test tempDir := t.TempDir() sstablePath := filepath.Join(tempDir, "test-iterator.sst") // Ensure fresh directory by removing files from temp dir os.RemoveAll(tempDir) os.MkdirAll(tempDir, 0755) // Create a new SSTable writer writer, err := NewWriter(sstablePath) if err != nil { t.Fatalf("Failed to create SSTable writer: %v", err) } // Add some key-value pairs numEntries := 100 orderedKeys := make([]string, 0, numEntries) keyValues := make(map[string]string, numEntries) for i := 0; i < numEntries; i++ { key := fmt.Sprintf("key%05d", i) value := fmt.Sprintf("value%05d", i) orderedKeys = append(orderedKeys, key) keyValues[key] = value err := writer.Add([]byte(key), []byte(value)) if err != nil { t.Fatalf("Failed to add entry: %v", err) } } // Finish writing err = writer.Finish() if err != nil { t.Fatalf("Failed to finish SSTable: %v", err) } // Print basic file info before opening fileInfo, err := os.Stat(sstablePath) if err != nil { t.Fatalf("Failed to stat SSTable file: %v", err) } fmt.Printf("DEBUG: SSTable file size before opening: %d bytes\n", fileInfo.Size()) // Open the SSTable for reading reader, err := OpenReader(sstablePath) if err != nil { t.Fatalf("Failed to open SSTable: %v", err) } defer reader.Close() // Debug basic reader info fmt.Printf("DEBUG: Reader metadata - indexOffset=%d, indexSize=%d, numEntries=%d\n", reader.indexOffset, reader.indexSize, reader.numEntries) // Print detailed information about the index t.Log("### SSTable Index Details ###") indexIter := reader.indexBlock.Iterator() indexCount := 0 t.Log("Index entries (block offsets and sizes):") for indexIter.SeekToFirst(); indexIter.Valid(); indexIter.Next() { indexKey := string(indexIter.Key()) indexValue := indexIter.Value() blockOffset := binary.LittleEndian.Uint64(indexValue[:8]) blockSize := binary.LittleEndian.Uint32(indexValue[8:12]) t.Logf(" Index entry %d: key=%s, offset=%d, size=%d", indexCount, indexKey, blockOffset, blockSize) // Read and verify each data block blockData := make([]byte, blockSize) _, err := reader.file.ReadAt(blockData, int64(blockOffset)) if err != nil { t.Errorf("Failed to read data block at offset %d: %v", blockOffset, err) continue } blockReader, err := block.NewReader(blockData) if err != nil { t.Errorf("Failed to create block reader for block at offset %d: %v", blockOffset, err) continue } // Count keys in this block blockIter := blockReader.Iterator() blockKeyCount := 0 for blockIter.SeekToFirst(); blockIter.Valid(); blockIter.Next() { blockKeyCount++ } t.Logf(" Block contains %d keys", blockKeyCount) indexCount++ } t.Logf("Total index entries: %d", indexCount) // Create an iterator iter := reader.NewIterator() // Verify we can read all keys foundKeys := make(map[string]bool) count := 0 t.Log("### Testing SSTable Iterator ###") // DEBUG: Check if the index iterator is valid before we start debugIndexIter := reader.indexBlock.Iterator() debugIndexIter.SeekToFirst() t.Logf("Index iterator valid before test: %v", debugIndexIter.Valid()) // Map of offsets to identify duplicates seenOffsets := make(map[uint64]*struct{offset uint64; key string}) uniqueOffsetsInOrder := make([]uint64, 0, 10) // Collect unique offsets for debugIndexIter.SeekToFirst(); debugIndexIter.Valid(); debugIndexIter.Next() { indexValue := debugIndexIter.Value() if len(indexValue) >= 8 { offset := binary.LittleEndian.Uint64(indexValue[:8]) key := string(debugIndexIter.Key()) // Only add if we haven't seen this offset before if _, ok := seenOffsets[offset]; !ok { seenOffsets[offset] = &struct{offset uint64; key string}{offset, key} uniqueOffsetsInOrder = append(uniqueOffsetsInOrder, offset) } } } // Log the unique offsets t.Log("Unique data block offsets:") for i, offset := range uniqueOffsetsInOrder { entry := seenOffsets[offset] t.Logf(" Block %d: offset=%d, first key=%s", i, entry.offset, entry.key) } // Get the first index entry for debugging debugIndexIter.SeekToFirst() if debugIndexIter.Valid() { indexValue := debugIndexIter.Value() blockOffset := binary.LittleEndian.Uint64(indexValue[:8]) blockSize := binary.LittleEndian.Uint32(indexValue[8:12]) t.Logf("First index entry points to offset=%d, size=%d", blockOffset, blockSize) } for iter.SeekToFirst(); iter.Valid(); iter.Next() { key := string(iter.Key()) if len(key) == 0 { t.Log("Found empty key, skipping") continue // Skip empty keys } value := string(iter.Value()) count++ if count <= 20 || count % 10 == 0 { t.Logf("Found key %d: %s, value: %s", count, key, value) } expectedValue, ok := keyValues[key] if !ok { t.Errorf("Found unexpected key: %s", key) continue } if value != expectedValue { t.Errorf("Value mismatch for key %s: expected %s, got %s", key, expectedValue, value) } foundKeys[key] = true // Debug: if we've read exactly 10 keys (the first block), // check the state of things before moving to next block if count == 10 { t.Log("### After reading first block (10 keys) ###") t.Log("Checking if there are more blocks available...") // Create new iterators for debugging debugIndexIter := reader.indexBlock.Iterator() debugIndexIter.SeekToFirst() if debugIndexIter.Next() { t.Log("There is a second entry in the index, so we should be able to read more blocks") indexValue := debugIndexIter.Value() blockOffset := binary.LittleEndian.Uint64(indexValue[:8]) blockSize := binary.LittleEndian.Uint32(indexValue[8:12]) t.Logf("Second index entry points to offset=%d, size=%d", blockOffset, blockSize) // Try reading the second block directly blockData := make([]byte, blockSize) _, err := reader.file.ReadAt(blockData, int64(blockOffset)) if err != nil { t.Errorf("Failed to read second block: %v", err) } else { blockReader, err := block.NewReader(blockData) if err != nil { t.Errorf("Failed to create reader for second block: %v", err) } else { blockIter := blockReader.Iterator() blockKeyCount := 0 t.Log("Keys in second block:") for blockIter.SeekToFirst(); blockIter.Valid() && blockKeyCount < 5; blockIter.Next() { t.Logf(" Key: %s", string(blockIter.Key())) blockKeyCount++ } t.Logf("Found %d keys in second block", blockKeyCount) } } } else { t.Log("No second entry in index, which is unexpected") } } } t.Logf("Iterator found %d keys total", count) if err := iter.Error(); err != nil { t.Errorf("Iterator error: %v", err) } // Make sure all keys were found if len(foundKeys) != numEntries { t.Errorf("Expected to find %d keys, got %d", numEntries, len(foundKeys)) // List keys that were not found missingCount := 0 for _, key := range orderedKeys { if !foundKeys[key] { if missingCount < 20 { t.Errorf("Key not found: %s", key) } missingCount++ } } if missingCount > 20 { t.Errorf("... and %d more keys not found", missingCount - 20) } } // Test seeking iter = reader.NewIterator() midKey := "key00050" found := iter.Seek([]byte(midKey)) if found { key := string(iter.Key()) _, ok := keyValues[key] if !ok { t.Errorf("Seek to %s returned invalid key: %s", midKey, key) } } else { t.Errorf("Failed to seek to %s", midKey) } } func TestSSTableCorruption(t *testing.T) { // Create a temporary directory for the test tempDir := t.TempDir() sstablePath := filepath.Join(tempDir, "test.sst") // Create a new SSTable writer writer, err := NewWriter(sstablePath) if err != nil { t.Fatalf("Failed to create SSTable writer: %v", err) } // Add some key-value pairs for i := 0; i < 100; i++ { key := []byte(fmt.Sprintf("key%05d", i)) value := []byte(fmt.Sprintf("value%05d", i)) err := writer.Add(key, value) if err != nil { t.Fatalf("Failed to add entry: %v", err) } } // Finish writing err = writer.Finish() if err != nil { t.Fatalf("Failed to finish SSTable: %v", err) } // Corrupt the file file, err := os.OpenFile(sstablePath, os.O_RDWR, 0) if err != nil { t.Fatalf("Failed to open file for corruption: %v", err) } // Write some garbage at the end to corrupt the footer _, err = file.Seek(-8, os.SEEK_END) if err != nil { t.Fatalf("Failed to seek: %v", err) } _, err = file.Write([]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}) if err != nil { t.Fatalf("Failed to write garbage: %v", err) } file.Close() // Try to open the corrupted file _, err = OpenReader(sstablePath) if err == nil { t.Errorf("Expected error when opening corrupted file, but got none") } } func TestSSTableAbort(t *testing.T) { // Create a temporary directory for the test tempDir := t.TempDir() sstablePath := filepath.Join(tempDir, "test.sst") // Create a new SSTable writer writer, err := NewWriter(sstablePath) if err != nil { t.Fatalf("Failed to create SSTable writer: %v", err) } // Add some key-value pairs for i := 0; i < 10; i++ { writer.Add([]byte(fmt.Sprintf("key%05d", i)), []byte(fmt.Sprintf("value%05d", i))) } // Get the temp file path tmpPath := writer.tmpPath // Abort writing err = writer.Abort() if err != nil { t.Fatalf("Failed to abort SSTable: %v", err) } // Verify that the temp file has been deleted _, err = os.Stat(tmpPath) if !os.IsNotExist(err) { t.Errorf("Temp file %s still exists after abort", tmpPath) } // Verify that the final file doesn't exist _, err = os.Stat(sstablePath) if !os.IsNotExist(err) { t.Errorf("Final file %s exists after abort", sstablePath) } }