feat: implement tiered compaction with tombstone handling
This commit is contained in:
parent
32f449c66a
commit
16780272dd
36
TODO.md
36
TODO.md
@ -92,27 +92,27 @@ This document outlines the implementation tasks for the Go Storage Engine, organ
|
||||
|
||||
## Phase E: Compaction
|
||||
|
||||
- [ ] Implement tiered compaction strategy
|
||||
- [ ] Create file selection algorithm based on overlap/size
|
||||
- [ ] Implement merge-sorted reading from input files
|
||||
- [ ] Add atomic output file generation
|
||||
- [ ] Create size ratio and file count based triggering
|
||||
- [✓] Implement tiered compaction strategy
|
||||
- [✓] Create file selection algorithm based on overlap/size
|
||||
- [✓] Implement merge-sorted reading from input files
|
||||
- [✓] Add atomic output file generation
|
||||
- [✓] Create size ratio and file count based triggering
|
||||
|
||||
- [ ] Handle tombstones and key deletion
|
||||
- [ ] Implement tombstone markers
|
||||
- [ ] Create logic for tombstone garbage collection
|
||||
- [ ] Test deletion correctness across compactions
|
||||
- [✓] Handle tombstones and key deletion
|
||||
- [✓] Implement tombstone markers
|
||||
- [✓] Create logic for tombstone garbage collection
|
||||
- [✓] Test deletion correctness across compactions
|
||||
|
||||
- [ ] Manage file obsolescence and cleanup
|
||||
- [ ] Implement safe file deletion after compaction
|
||||
- [ ] Create consistent file tracking
|
||||
- [ ] Add error handling for cleanup failures
|
||||
- [✓] Manage file obsolescence and cleanup
|
||||
- [✓] Implement safe file deletion after compaction
|
||||
- [✓] Create consistent file tracking
|
||||
- [✓] Add error handling for cleanup failures
|
||||
|
||||
- [ ] Build background compaction
|
||||
- [ ] Implement worker pool for compaction tasks
|
||||
- [ ] Add rate limiting to prevent I/O saturation
|
||||
- [ ] Create metrics for monitoring compaction progress
|
||||
- [ ] Implement priority scheduling for urgent compactions
|
||||
- [✓] Build background compaction
|
||||
- [✓] Implement worker pool for compaction tasks
|
||||
- [✓] Add rate limiting to prevent I/O saturation
|
||||
- [✓] Create metrics for monitoring compaction progress
|
||||
- [✓] Implement priority scheduling for urgent compactions
|
||||
|
||||
## Phase F: Basic Atomicity and Features
|
||||
|
||||
|
529
pkg/compaction/compaction.go
Normal file
529
pkg/compaction/compaction.go
Normal file
@ -0,0 +1,529 @@
|
||||
package compaction
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.canoozie.net/jer/go-storage/pkg/config"
|
||||
"git.canoozie.net/jer/go-storage/pkg/iterator"
|
||||
"git.canoozie.net/jer/go-storage/pkg/sstable"
|
||||
)
|
||||
|
||||
// SSTableInfo represents metadata about an SSTable file
|
||||
type SSTableInfo struct {
|
||||
// Path of the SSTable file
|
||||
Path string
|
||||
|
||||
// Level number (0 to N)
|
||||
Level int
|
||||
|
||||
// Sequence number for the file within its level
|
||||
Sequence uint64
|
||||
|
||||
// Timestamp when the file was created
|
||||
Timestamp int64
|
||||
|
||||
// Approximate size of the file in bytes
|
||||
Size int64
|
||||
|
||||
// Estimated key count (may be approximate)
|
||||
KeyCount int
|
||||
|
||||
// First key in the SSTable
|
||||
FirstKey []byte
|
||||
|
||||
// Last key in the SSTable
|
||||
LastKey []byte
|
||||
|
||||
// Reader for the SSTable
|
||||
Reader *sstable.Reader
|
||||
}
|
||||
|
||||
// Overlaps checks if this SSTable's key range overlaps with another SSTable
|
||||
func (s *SSTableInfo) Overlaps(other *SSTableInfo) bool {
|
||||
// If either SSTable has no keys, they don't overlap
|
||||
if len(s.FirstKey) == 0 || len(s.LastKey) == 0 ||
|
||||
len(other.FirstKey) == 0 || len(other.LastKey) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for overlap: not (s ends before other starts OR s starts after other ends)
|
||||
// s.LastKey < other.FirstKey || s.FirstKey > other.LastKey
|
||||
return !(bytes.Compare(s.LastKey, other.FirstKey) < 0 ||
|
||||
bytes.Compare(s.FirstKey, other.LastKey) > 0)
|
||||
}
|
||||
|
||||
// KeyRange returns a string representation of the key range in this SSTable
|
||||
func (s *SSTableInfo) KeyRange() string {
|
||||
return fmt.Sprintf("[%s, %s]",
|
||||
string(s.FirstKey), string(s.LastKey))
|
||||
}
|
||||
|
||||
// String returns a string representation of the SSTable info
|
||||
func (s *SSTableInfo) String() string {
|
||||
return fmt.Sprintf("L%d-%06d-%020d.sst Size:%d Keys:%d Range:%s",
|
||||
s.Level, s.Sequence, s.Timestamp, s.Size, s.KeyCount, s.KeyRange())
|
||||
}
|
||||
|
||||
// CompactionTask represents a set of SSTables to be compacted
|
||||
type CompactionTask struct {
|
||||
// Input SSTables to compact, grouped by level
|
||||
InputFiles map[int][]*SSTableInfo
|
||||
|
||||
// Target level for compaction output
|
||||
TargetLevel int
|
||||
|
||||
// Output file path template
|
||||
OutputPathTemplate string
|
||||
}
|
||||
|
||||
// Compactor manages the compaction process
|
||||
type Compactor struct {
|
||||
// Configuration
|
||||
cfg *config.Config
|
||||
|
||||
// SSTable directory
|
||||
sstableDir string
|
||||
|
||||
// File information by level
|
||||
levels map[int][]*SSTableInfo
|
||||
}
|
||||
|
||||
// NewCompactor creates a new compaction manager
|
||||
func NewCompactor(cfg *config.Config, sstableDir string) *Compactor {
|
||||
return &Compactor{
|
||||
cfg: cfg,
|
||||
sstableDir: sstableDir,
|
||||
levels: make(map[int][]*SSTableInfo),
|
||||
}
|
||||
}
|
||||
|
||||
// LoadSSTables scans the SSTable directory and loads metadata for all files
|
||||
func (c *Compactor) LoadSSTables() error {
|
||||
// Clear existing data
|
||||
c.levels = make(map[int][]*SSTableInfo)
|
||||
|
||||
// Read all files from the SSTable directory
|
||||
entries, err := os.ReadDir(c.sstableDir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil // Directory doesn't exist yet
|
||||
}
|
||||
return fmt.Errorf("failed to read SSTable directory: %w", err)
|
||||
}
|
||||
|
||||
// Parse filenames and collect information
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".sst") {
|
||||
continue // Skip directories and non-SSTable files
|
||||
}
|
||||
|
||||
// Parse filename to extract level, sequence, and timestamp
|
||||
// Filename format: level_sequence_timestamp.sst
|
||||
var level int
|
||||
var sequence uint64
|
||||
var timestamp int64
|
||||
|
||||
if n, err := fmt.Sscanf(entry.Name(), "%d_%06d_%020d.sst",
|
||||
&level, &sequence, ×tamp); n != 3 || err != nil {
|
||||
// Skip files that don't match our naming pattern
|
||||
continue
|
||||
}
|
||||
|
||||
// Get file info for size
|
||||
fi, err := entry.Info()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get file info for %s: %w", entry.Name(), err)
|
||||
}
|
||||
|
||||
// Open the file to extract key range information
|
||||
path := filepath.Join(c.sstableDir, entry.Name())
|
||||
reader, err := sstable.OpenReader(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open SSTable %s: %w", path, err)
|
||||
}
|
||||
|
||||
// Create iterator to get first and last keys
|
||||
iter := reader.NewIterator()
|
||||
var firstKey, lastKey []byte
|
||||
|
||||
// Get first key
|
||||
iter.SeekToFirst()
|
||||
if iter.Valid() {
|
||||
firstKey = append([]byte{}, iter.Key()...)
|
||||
}
|
||||
|
||||
// Get last key
|
||||
iter.SeekToLast()
|
||||
if iter.Valid() {
|
||||
lastKey = append([]byte{}, iter.Key()...)
|
||||
}
|
||||
|
||||
// Create SSTable info
|
||||
info := &SSTableInfo{
|
||||
Path: path,
|
||||
Level: level,
|
||||
Sequence: sequence,
|
||||
Timestamp: timestamp,
|
||||
Size: fi.Size(),
|
||||
KeyCount: reader.GetKeyCount(),
|
||||
FirstKey: firstKey,
|
||||
LastKey: lastKey,
|
||||
Reader: reader,
|
||||
}
|
||||
|
||||
// Add to appropriate level
|
||||
c.levels[level] = append(c.levels[level], info)
|
||||
}
|
||||
|
||||
// Sort files within each level by sequence number
|
||||
for level, files := range c.levels {
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].Sequence < files[j].Sequence
|
||||
})
|
||||
c.levels[level] = files
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes all open SSTable readers
|
||||
func (c *Compactor) Close() error {
|
||||
var lastErr error
|
||||
|
||||
for _, files := range c.levels {
|
||||
for _, file := range files {
|
||||
if file.Reader != nil {
|
||||
if err := file.Reader.Close(); err != nil && lastErr == nil {
|
||||
lastErr = err
|
||||
}
|
||||
file.Reader = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// GetLevelSize returns the total size of all files in a level
|
||||
func (c *Compactor) GetLevelSize(level int) int64 {
|
||||
var size int64
|
||||
for _, file := range c.levels[level] {
|
||||
size += file.Size
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
// GetCompactionTask selects files for compaction based on size ratio and overlap
|
||||
func (c *Compactor) GetCompactionTask() (*CompactionTask, error) {
|
||||
// No compaction if we don't have at least 2 levels with files
|
||||
if len(c.levels) < 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Check if L0 needs compaction (Level 0 is special because files may overlap)
|
||||
if len(c.levels[0]) >= c.cfg.MaxMemTables {
|
||||
return c.selectLevel0Compaction()
|
||||
}
|
||||
|
||||
// Check higher levels based on size ratio
|
||||
maxLevel := 0
|
||||
for level := range c.levels {
|
||||
if level > maxLevel {
|
||||
maxLevel = level
|
||||
}
|
||||
}
|
||||
|
||||
// Check each level starting from 1
|
||||
for level := 1; level <= maxLevel; level++ {
|
||||
// If this level is too large compared to the next level
|
||||
nextLevelSize := c.GetLevelSize(level + 1)
|
||||
thisLevelSize := c.GetLevelSize(level)
|
||||
|
||||
// If this level is empty, skip it
|
||||
if thisLevelSize == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// If the next level doesn't exist yet or is empty, create it
|
||||
if nextLevelSize == 0 {
|
||||
// Choose one file from this level to move to the next level
|
||||
if len(c.levels[level]) > 0 {
|
||||
return c.selectSingleFileCompaction(level)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if this level exceeds the size ratio threshold
|
||||
sizeRatio := float64(thisLevelSize) / float64(nextLevelSize)
|
||||
if sizeRatio >= c.cfg.CompactionRatio {
|
||||
return c.selectSizeBasedCompaction(level)
|
||||
}
|
||||
}
|
||||
|
||||
// No compaction needed
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// selectLevel0Compaction selects files from L0 for compaction
|
||||
func (c *Compactor) selectLevel0Compaction() (*CompactionTask, error) {
|
||||
// Not enough files in L0 for compaction
|
||||
if len(c.levels[0]) < 2 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// For L0, take oldest files and compact them to L1
|
||||
numFiles := len(c.levels[0])
|
||||
maxCompactFiles := c.cfg.MaxMemTables
|
||||
if numFiles > maxCompactFiles {
|
||||
numFiles = maxCompactFiles
|
||||
}
|
||||
|
||||
// Sort L0 files by sequence number to get oldest first
|
||||
files := make([]*SSTableInfo, len(c.levels[0]))
|
||||
copy(files, c.levels[0])
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].Sequence < files[j].Sequence
|
||||
})
|
||||
|
||||
// Select oldest files for compaction
|
||||
selectedFiles := files[:numFiles]
|
||||
|
||||
// Determine key range of selected files
|
||||
var minKey, maxKey []byte
|
||||
for _, file := range selectedFiles {
|
||||
if len(minKey) == 0 || bytes.Compare(file.FirstKey, minKey) < 0 {
|
||||
minKey = file.FirstKey
|
||||
}
|
||||
if len(maxKey) == 0 || bytes.Compare(file.LastKey, maxKey) > 0 {
|
||||
maxKey = file.LastKey
|
||||
}
|
||||
}
|
||||
|
||||
// Find overlapping files in L1
|
||||
var l1Files []*SSTableInfo
|
||||
for _, file := range c.levels[1] {
|
||||
// Create a temporary SSTableInfo for the key range
|
||||
rangeInfo := &SSTableInfo{
|
||||
FirstKey: minKey,
|
||||
LastKey: maxKey,
|
||||
}
|
||||
|
||||
if file.Overlaps(rangeInfo) {
|
||||
l1Files = append(l1Files, file)
|
||||
}
|
||||
}
|
||||
|
||||
// Create the compaction task
|
||||
task := &CompactionTask{
|
||||
InputFiles: map[int][]*SSTableInfo{
|
||||
0: selectedFiles,
|
||||
1: l1Files,
|
||||
},
|
||||
TargetLevel: 1,
|
||||
OutputPathTemplate: filepath.Join(c.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
return task, nil
|
||||
}
|
||||
|
||||
// selectSingleFileCompaction selects a single file to compact to the next level
|
||||
func (c *Compactor) selectSingleFileCompaction(level int) (*CompactionTask, error) {
|
||||
// Find the oldest file in this level
|
||||
if len(c.levels[level]) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Sort by sequence number to get the oldest file
|
||||
files := make([]*SSTableInfo, len(c.levels[level]))
|
||||
copy(files, c.levels[level])
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].Sequence < files[j].Sequence
|
||||
})
|
||||
|
||||
// Select the oldest file
|
||||
file := files[0]
|
||||
|
||||
// Find overlapping files in the next level
|
||||
var nextLevelFiles []*SSTableInfo
|
||||
for _, nextFile := range c.levels[level+1] {
|
||||
if file.Overlaps(nextFile) {
|
||||
nextLevelFiles = append(nextLevelFiles, nextFile)
|
||||
}
|
||||
}
|
||||
|
||||
// Create the compaction task
|
||||
task := &CompactionTask{
|
||||
InputFiles: map[int][]*SSTableInfo{
|
||||
level: {file},
|
||||
level + 1: nextLevelFiles,
|
||||
},
|
||||
TargetLevel: level + 1,
|
||||
OutputPathTemplate: filepath.Join(c.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
return task, nil
|
||||
}
|
||||
|
||||
// selectSizeBasedCompaction selects files for compaction based on size ratio
|
||||
func (c *Compactor) selectSizeBasedCompaction(level int) (*CompactionTask, error) {
|
||||
// Find a file from this level to compact
|
||||
if len(c.levels[level]) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Choose a file that hasn't been compacted recently
|
||||
// For simplicity, just choose the oldest file (lowest sequence number)
|
||||
files := make([]*SSTableInfo, len(c.levels[level]))
|
||||
copy(files, c.levels[level])
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].Sequence < files[j].Sequence
|
||||
})
|
||||
|
||||
// Select the oldest file
|
||||
file := files[0]
|
||||
|
||||
// Find overlapping files in the next level
|
||||
var nextLevelFiles []*SSTableInfo
|
||||
for _, nextFile := range c.levels[level+1] {
|
||||
if file.Overlaps(nextFile) {
|
||||
nextLevelFiles = append(nextLevelFiles, nextFile)
|
||||
}
|
||||
}
|
||||
|
||||
// Create the compaction task
|
||||
task := &CompactionTask{
|
||||
InputFiles: map[int][]*SSTableInfo{
|
||||
level: {file},
|
||||
level + 1: nextLevelFiles,
|
||||
},
|
||||
TargetLevel: level + 1,
|
||||
OutputPathTemplate: filepath.Join(c.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
return task, nil
|
||||
}
|
||||
|
||||
// CompactFiles performs the actual compaction of the input files
|
||||
func (c *Compactor) CompactFiles(task *CompactionTask) ([]string, error) {
|
||||
// Create a merged iterator over all input files
|
||||
var iterators []iterator.Iterator
|
||||
|
||||
// Add iterators from both levels
|
||||
for level := 0; level <= task.TargetLevel; level++ {
|
||||
for _, file := range task.InputFiles[level] {
|
||||
// We need an iterator that preserves delete markers
|
||||
if file.Reader != nil {
|
||||
iterators = append(iterators, file.Reader.NewIterator())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create hierarchical merged iterator
|
||||
mergedIter := iterator.NewHierarchicalIterator(iterators)
|
||||
|
||||
// Remember the input file paths for later cleanup
|
||||
var inputPaths []string
|
||||
for _, files := range task.InputFiles {
|
||||
for _, file := range files {
|
||||
inputPaths = append(inputPaths, file.Path)
|
||||
}
|
||||
}
|
||||
|
||||
// Track keys to skip duplicate entries (for tombstones)
|
||||
var lastKey []byte
|
||||
var outputFiles []string
|
||||
var currentWriter *sstable.Writer
|
||||
var currentOutputPath string
|
||||
var outputFileSequence uint64 = 1
|
||||
var entriesInCurrentFile int
|
||||
|
||||
// Function to create a new output file
|
||||
createNewOutputFile := func() error {
|
||||
if currentWriter != nil {
|
||||
if err := currentWriter.Finish(); err != nil {
|
||||
return fmt.Errorf("failed to finish SSTable: %w", err)
|
||||
}
|
||||
outputFiles = append(outputFiles, currentOutputPath)
|
||||
}
|
||||
|
||||
// Create a new output file
|
||||
timestamp := time.Now().UnixNano()
|
||||
currentOutputPath = fmt.Sprintf(task.OutputPathTemplate,
|
||||
task.TargetLevel, outputFileSequence, timestamp)
|
||||
outputFileSequence++
|
||||
|
||||
var err error
|
||||
currentWriter, err = sstable.NewWriter(currentOutputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create SSTable writer: %w", err)
|
||||
}
|
||||
|
||||
entriesInCurrentFile = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the first output file
|
||||
if err := createNewOutputFile(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Iterate through all keys in sorted order
|
||||
mergedIter.SeekToFirst()
|
||||
for mergedIter.Valid() {
|
||||
key := mergedIter.Key()
|
||||
value := mergedIter.Value()
|
||||
|
||||
// Skip duplicates (we've already included the newest version)
|
||||
if lastKey != nil && bytes.Equal(key, lastKey) {
|
||||
mergedIter.Next()
|
||||
continue
|
||||
}
|
||||
|
||||
// Add the entry to the current output file, preserving both values and tombstones
|
||||
// This ensures deletion markers are properly maintained
|
||||
if err := currentWriter.Add(key, value); err != nil {
|
||||
return nil, fmt.Errorf("failed to add entry to SSTable: %w", err)
|
||||
}
|
||||
entriesInCurrentFile++
|
||||
|
||||
// If the current file is big enough, start a new one
|
||||
if int64(entriesInCurrentFile) >= c.cfg.SSTableMaxSize {
|
||||
if err := createNewOutputFile(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Remember this key to skip duplicates
|
||||
lastKey = append(lastKey[:0], key...)
|
||||
mergedIter.Next()
|
||||
}
|
||||
|
||||
// Finish the last output file
|
||||
if currentWriter != nil && entriesInCurrentFile > 0 {
|
||||
if err := currentWriter.Finish(); err != nil {
|
||||
return nil, fmt.Errorf("failed to finish SSTable: %w", err)
|
||||
}
|
||||
outputFiles = append(outputFiles, currentOutputPath)
|
||||
} else if currentWriter != nil {
|
||||
// No entries were written, abort the file
|
||||
currentWriter.Abort()
|
||||
}
|
||||
|
||||
return outputFiles, nil
|
||||
}
|
||||
|
||||
// DeleteCompactedFiles removes the input files that were successfully compacted
|
||||
func (c *Compactor) DeleteCompactedFiles(filePaths []string) error {
|
||||
for _, path := range filePaths {
|
||||
if err := os.Remove(path); err != nil {
|
||||
return fmt.Errorf("failed to delete compacted file %s: %w", path, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
412
pkg/compaction/compaction_test.go
Normal file
412
pkg/compaction/compaction_test.go
Normal file
@ -0,0 +1,412 @@
|
||||
package compaction
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.canoozie.net/jer/go-storage/pkg/config"
|
||||
"git.canoozie.net/jer/go-storage/pkg/sstable"
|
||||
)
|
||||
|
||||
func createTestSSTable(t *testing.T, dir string, level, seq int, timestamp int64, keyValues map[string]string) string {
|
||||
filename := fmt.Sprintf("%d_%06d_%020d.sst", level, seq, timestamp)
|
||||
path := filepath.Join(dir, filename)
|
||||
|
||||
writer, err := sstable.NewWriter(path)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create SSTable writer: %v", err)
|
||||
}
|
||||
|
||||
// Get the keys and sort them to ensure they're added in order
|
||||
var keys []string
|
||||
for k := range keyValues {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
// Add keys in sorted order
|
||||
for _, k := range keys {
|
||||
if err := writer.Add([]byte(k), []byte(keyValues[k])); err != nil {
|
||||
t.Fatalf("Failed to add entry to SSTable: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := writer.Finish(); err != nil {
|
||||
t.Fatalf("Failed to finish SSTable: %v", err)
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
func setupCompactionTest(t *testing.T) (string, *config.Config, func()) {
|
||||
// Create a temp directory for testing
|
||||
tempDir, err := os.MkdirTemp("", "compaction-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
|
||||
// Create the SSTable directory
|
||||
sstDir := filepath.Join(tempDir, "sst")
|
||||
if err := os.MkdirAll(sstDir, 0755); err != nil {
|
||||
t.Fatalf("Failed to create SSTable directory: %v", err)
|
||||
}
|
||||
|
||||
// Create a test configuration
|
||||
cfg := &config.Config{
|
||||
Version: config.CurrentManifestVersion,
|
||||
SSTDir: sstDir,
|
||||
CompactionLevels: 4,
|
||||
CompactionRatio: 10.0,
|
||||
CompactionThreads: 1,
|
||||
MaxMemTables: 2,
|
||||
SSTableMaxSize: 1000,
|
||||
}
|
||||
|
||||
// Return cleanup function
|
||||
cleanup := func() {
|
||||
os.RemoveAll(tempDir)
|
||||
}
|
||||
|
||||
return sstDir, cfg, cleanup
|
||||
}
|
||||
|
||||
func TestCompactorLoadSSTables(t *testing.T) {
|
||||
sstDir, cfg, cleanup := setupCompactionTest(t)
|
||||
defer cleanup()
|
||||
|
||||
// Create test SSTables
|
||||
data1 := map[string]string{
|
||||
"a": "1",
|
||||
"b": "2",
|
||||
"c": "3",
|
||||
}
|
||||
|
||||
data2 := map[string]string{
|
||||
"d": "4",
|
||||
"e": "5",
|
||||
"f": "6",
|
||||
}
|
||||
|
||||
// Keys will be sorted in the createTestSSTable function
|
||||
|
||||
timestamp := time.Now().UnixNano()
|
||||
createTestSSTable(t, sstDir, 0, 1, timestamp, data1)
|
||||
createTestSSTable(t, sstDir, 0, 2, timestamp+1, data2)
|
||||
|
||||
// Create the compactor
|
||||
compactor := NewCompactor(cfg, sstDir)
|
||||
|
||||
// Load SSTables
|
||||
err := compactor.LoadSSTables()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load SSTables: %v", err)
|
||||
}
|
||||
|
||||
// Verify the correct number of files was loaded
|
||||
if len(compactor.levels[0]) != 2 {
|
||||
t.Errorf("Expected 2 files in level 0, got %d", len(compactor.levels[0]))
|
||||
}
|
||||
|
||||
// Verify key ranges
|
||||
for _, file := range compactor.levels[0] {
|
||||
if bytes.Equal(file.FirstKey, []byte("a")) {
|
||||
if !bytes.Equal(file.LastKey, []byte("c")) {
|
||||
t.Errorf("Expected last key 'c', got '%s'", string(file.LastKey))
|
||||
}
|
||||
} else if bytes.Equal(file.FirstKey, []byte("d")) {
|
||||
if !bytes.Equal(file.LastKey, []byte("f")) {
|
||||
t.Errorf("Expected last key 'f', got '%s'", string(file.LastKey))
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Unexpected first key: %s", string(file.FirstKey))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSSTableInfoOverlaps(t *testing.T) {
|
||||
// Create test SSTable info objects
|
||||
info1 := &SSTableInfo{
|
||||
FirstKey: []byte("a"),
|
||||
LastKey: []byte("c"),
|
||||
}
|
||||
|
||||
info2 := &SSTableInfo{
|
||||
FirstKey: []byte("b"),
|
||||
LastKey: []byte("d"),
|
||||
}
|
||||
|
||||
info3 := &SSTableInfo{
|
||||
FirstKey: []byte("e"),
|
||||
LastKey: []byte("g"),
|
||||
}
|
||||
|
||||
// Test overlapping ranges
|
||||
if !info1.Overlaps(info2) {
|
||||
t.Errorf("Expected info1 to overlap with info2")
|
||||
}
|
||||
|
||||
if !info2.Overlaps(info1) {
|
||||
t.Errorf("Expected info2 to overlap with info1")
|
||||
}
|
||||
|
||||
// Test non-overlapping ranges
|
||||
if info1.Overlaps(info3) {
|
||||
t.Errorf("Expected info1 not to overlap with info3")
|
||||
}
|
||||
|
||||
if info3.Overlaps(info1) {
|
||||
t.Errorf("Expected info3 not to overlap with info1")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactorSelectLevel0Compaction(t *testing.T) {
|
||||
sstDir, cfg, cleanup := setupCompactionTest(t)
|
||||
defer cleanup()
|
||||
|
||||
// Create 3 test SSTables in L0
|
||||
data1 := map[string]string{
|
||||
"a": "1",
|
||||
"b": "2",
|
||||
}
|
||||
|
||||
data2 := map[string]string{
|
||||
"c": "3",
|
||||
"d": "4",
|
||||
}
|
||||
|
||||
data3 := map[string]string{
|
||||
"e": "5",
|
||||
"f": "6",
|
||||
}
|
||||
|
||||
timestamp := time.Now().UnixNano()
|
||||
createTestSSTable(t, sstDir, 0, 1, timestamp, data1)
|
||||
createTestSSTable(t, sstDir, 0, 2, timestamp+1, data2)
|
||||
createTestSSTable(t, sstDir, 0, 3, timestamp+2, data3)
|
||||
|
||||
// Create the compactor
|
||||
compactor := NewTieredCompactor(cfg, sstDir)
|
||||
|
||||
// Load SSTables
|
||||
err := compactor.LoadSSTables()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load SSTables: %v", err)
|
||||
}
|
||||
|
||||
// Select compaction task
|
||||
task, err := compactor.SelectCompaction()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to select compaction: %v", err)
|
||||
}
|
||||
|
||||
// Verify the task
|
||||
if task == nil {
|
||||
t.Fatalf("Expected compaction task, got nil")
|
||||
}
|
||||
|
||||
// L0 should have files to compact (since we have > cfg.MaxMemTables files)
|
||||
if len(task.InputFiles[0]) == 0 {
|
||||
t.Errorf("Expected L0 files to compact, got none")
|
||||
}
|
||||
|
||||
// Target level should be 1
|
||||
if task.TargetLevel != 1 {
|
||||
t.Errorf("Expected target level 1, got %d", task.TargetLevel)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactFiles(t *testing.T) {
|
||||
sstDir, cfg, cleanup := setupCompactionTest(t)
|
||||
defer cleanup()
|
||||
|
||||
// Create test SSTables with overlapping key ranges
|
||||
data1 := map[string]string{
|
||||
"a": "1-L0", // Will be overwritten by L1
|
||||
"b": "2-L0",
|
||||
"c": "3-L0",
|
||||
}
|
||||
|
||||
data2 := map[string]string{
|
||||
"a": "1-L1", // Newer version than L0 (lower level has priority)
|
||||
"d": "4-L1",
|
||||
"e": "5-L1",
|
||||
}
|
||||
|
||||
timestamp := time.Now().UnixNano()
|
||||
sstPath1 := createTestSSTable(t, sstDir, 0, 1, timestamp, data1)
|
||||
sstPath2 := createTestSSTable(t, sstDir, 1, 1, timestamp+1, data2)
|
||||
|
||||
// Log the created test files
|
||||
t.Logf("Created test SSTables: %s, %s", sstPath1, sstPath2)
|
||||
|
||||
// Create the compactor
|
||||
compactor := NewCompactor(cfg, sstDir)
|
||||
|
||||
// Load SSTables
|
||||
err := compactor.LoadSSTables()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load SSTables: %v", err)
|
||||
}
|
||||
|
||||
// Create a compaction task
|
||||
task := &CompactionTask{
|
||||
InputFiles: map[int][]*SSTableInfo{
|
||||
0: {compactor.levels[0][0]},
|
||||
1: {compactor.levels[1][0]},
|
||||
},
|
||||
TargetLevel: 1,
|
||||
OutputPathTemplate: filepath.Join(sstDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
// Perform compaction
|
||||
outputFiles, err := compactor.CompactFiles(task)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to compact files: %v", err)
|
||||
}
|
||||
|
||||
if len(outputFiles) == 0 {
|
||||
t.Fatalf("Expected output files, got none")
|
||||
}
|
||||
|
||||
// Open the output file and verify its contents
|
||||
reader, err := sstable.OpenReader(outputFiles[0])
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open output SSTable: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
// Check each key
|
||||
checks := map[string]string{
|
||||
"a": "1-L0", // L0 has priority over L1
|
||||
"b": "2-L0",
|
||||
"c": "3-L0",
|
||||
"d": "4-L1",
|
||||
"e": "5-L1",
|
||||
}
|
||||
|
||||
for k, expectedValue := range checks {
|
||||
value, err := reader.Get([]byte(k))
|
||||
if err != nil {
|
||||
t.Errorf("Failed to get key %s: %v", k, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !bytes.Equal(value, []byte(expectedValue)) {
|
||||
t.Errorf("Key %s: expected value '%s', got '%s'",
|
||||
k, expectedValue, string(value))
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up the output file
|
||||
for _, file := range outputFiles {
|
||||
os.Remove(file)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTombstoneTracking(t *testing.T) {
|
||||
// Create a tombstone tracker with a short retention period for testing
|
||||
tracker := NewTombstoneTracker(100 * time.Millisecond)
|
||||
|
||||
// Add some tombstones
|
||||
tracker.AddTombstone([]byte("key1"))
|
||||
tracker.AddTombstone([]byte("key2"))
|
||||
|
||||
// Should keep tombstones initially
|
||||
if !tracker.ShouldKeepTombstone([]byte("key1")) {
|
||||
t.Errorf("Expected to keep tombstone for key1")
|
||||
}
|
||||
|
||||
if !tracker.ShouldKeepTombstone([]byte("key2")) {
|
||||
t.Errorf("Expected to keep tombstone for key2")
|
||||
}
|
||||
|
||||
// Wait for the retention period to expire
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Garbage collect expired tombstones
|
||||
tracker.CollectGarbage()
|
||||
|
||||
// Should no longer keep the tombstones
|
||||
if tracker.ShouldKeepTombstone([]byte("key1")) {
|
||||
t.Errorf("Expected to discard tombstone for key1 after expiration")
|
||||
}
|
||||
|
||||
if tracker.ShouldKeepTombstone([]byte("key2")) {
|
||||
t.Errorf("Expected to discard tombstone for key2 after expiration")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactionManager(t *testing.T) {
|
||||
sstDir, cfg, cleanup := setupCompactionTest(t)
|
||||
defer cleanup()
|
||||
|
||||
// Create test SSTables in multiple levels
|
||||
data1 := map[string]string{
|
||||
"a": "1",
|
||||
"b": "2",
|
||||
}
|
||||
|
||||
data2 := map[string]string{
|
||||
"c": "3",
|
||||
"d": "4",
|
||||
}
|
||||
|
||||
data3 := map[string]string{
|
||||
"e": "5",
|
||||
"f": "6",
|
||||
}
|
||||
|
||||
timestamp := time.Now().UnixNano()
|
||||
// Create test SSTables and remember their paths for verification
|
||||
sst1 := createTestSSTable(t, sstDir, 0, 1, timestamp, data1)
|
||||
sst2 := createTestSSTable(t, sstDir, 0, 2, timestamp+1, data2)
|
||||
sst3 := createTestSSTable(t, sstDir, 1, 1, timestamp+2, data3)
|
||||
|
||||
// Log the created files for debugging
|
||||
t.Logf("Created test SSTables: %s, %s, %s", sst1, sst2, sst3)
|
||||
|
||||
// Create the compaction manager
|
||||
manager := NewCompactionManager(cfg, sstDir)
|
||||
|
||||
// Start the manager
|
||||
err := manager.Start()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to start compaction manager: %v", err)
|
||||
}
|
||||
|
||||
// Force a compaction cycle
|
||||
err = manager.TriggerCompaction()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to trigger compaction: %v", err)
|
||||
}
|
||||
|
||||
// Mark some files as obsolete
|
||||
manager.MarkFileObsolete(sst1)
|
||||
manager.MarkFileObsolete(sst2)
|
||||
|
||||
// Clean up obsolete files
|
||||
err = manager.CleanupObsoleteFiles()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to clean up obsolete files: %v", err)
|
||||
}
|
||||
|
||||
// Verify the files were deleted
|
||||
if _, err := os.Stat(sst1); !os.IsNotExist(err) {
|
||||
t.Errorf("Expected %s to be deleted, but it still exists", sst1)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(sst2); !os.IsNotExist(err) {
|
||||
t.Errorf("Expected %s to be deleted, but it still exists", sst2)
|
||||
}
|
||||
|
||||
// Stop the manager
|
||||
err = manager.Stop()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to stop compaction manager: %v", err)
|
||||
}
|
||||
}
|
393
pkg/compaction/manager.go
Normal file
393
pkg/compaction/manager.go
Normal file
@ -0,0 +1,393 @@
|
||||
package compaction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.canoozie.net/jer/go-storage/pkg/config"
|
||||
)
|
||||
|
||||
// CompactionManager coordinates the compaction process
|
||||
type CompactionManager struct {
|
||||
// Configuration
|
||||
cfg *config.Config
|
||||
|
||||
// SSTable directory
|
||||
sstableDir string
|
||||
|
||||
// Compactor implementation
|
||||
compactor *TieredCompactor
|
||||
|
||||
// Tombstone tracker
|
||||
tombstones *TombstoneTracker
|
||||
|
||||
// Next sequence number for SSTable files
|
||||
nextSeq uint64
|
||||
|
||||
// Compaction state
|
||||
running bool
|
||||
stopCh chan struct{}
|
||||
compactingMu sync.Mutex
|
||||
|
||||
// File tracking
|
||||
// Map of file path -> true for files that have been obsoleted by compaction
|
||||
obsoleteFiles map[string]bool
|
||||
// Map of file path -> true for files that are currently being compacted
|
||||
pendingFiles map[string]bool
|
||||
// Last set of files produced by compaction
|
||||
lastCompactionOutputs []string
|
||||
filesMu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewCompactionManager creates a new compaction manager
|
||||
func NewCompactionManager(cfg *config.Config, sstableDir string) *CompactionManager {
|
||||
return &CompactionManager{
|
||||
cfg: cfg,
|
||||
sstableDir: sstableDir,
|
||||
compactor: NewTieredCompactor(cfg, sstableDir),
|
||||
tombstones: NewTombstoneTracker(24 * time.Hour), // Default 24-hour retention
|
||||
nextSeq: 1,
|
||||
stopCh: make(chan struct{}),
|
||||
obsoleteFiles: make(map[string]bool),
|
||||
pendingFiles: make(map[string]bool),
|
||||
lastCompactionOutputs: make([]string, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins background compaction
|
||||
func (cm *CompactionManager) Start() error {
|
||||
cm.compactingMu.Lock()
|
||||
defer cm.compactingMu.Unlock()
|
||||
|
||||
if cm.running {
|
||||
return nil // Already running
|
||||
}
|
||||
|
||||
// Load existing SSTables
|
||||
if err := cm.compactor.LoadSSTables(); err != nil {
|
||||
return fmt.Errorf("failed to load SSTables: %w", err)
|
||||
}
|
||||
|
||||
cm.running = true
|
||||
cm.stopCh = make(chan struct{})
|
||||
|
||||
// Start background worker
|
||||
go cm.compactionWorker()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop halts background compaction
|
||||
func (cm *CompactionManager) Stop() error {
|
||||
cm.compactingMu.Lock()
|
||||
defer cm.compactingMu.Unlock()
|
||||
|
||||
if !cm.running {
|
||||
return nil // Already stopped
|
||||
}
|
||||
|
||||
// Signal the worker to stop
|
||||
close(cm.stopCh)
|
||||
cm.running = false
|
||||
|
||||
// Close compactor
|
||||
return cm.compactor.Close()
|
||||
}
|
||||
|
||||
// MarkFileObsolete marks a file as obsolete
|
||||
func (cm *CompactionManager) MarkFileObsolete(path string) {
|
||||
cm.filesMu.Lock()
|
||||
defer cm.filesMu.Unlock()
|
||||
|
||||
cm.obsoleteFiles[path] = true
|
||||
}
|
||||
|
||||
// MarkFilePending marks a file as being used in a compaction
|
||||
func (cm *CompactionManager) MarkFilePending(path string) {
|
||||
cm.filesMu.Lock()
|
||||
defer cm.filesMu.Unlock()
|
||||
|
||||
cm.pendingFiles[path] = true
|
||||
}
|
||||
|
||||
// UnmarkFilePending removes the pending mark
|
||||
func (cm *CompactionManager) UnmarkFilePending(path string) {
|
||||
cm.filesMu.Lock()
|
||||
defer cm.filesMu.Unlock()
|
||||
|
||||
delete(cm.pendingFiles, path)
|
||||
}
|
||||
|
||||
// IsFileObsolete checks if a file is marked as obsolete
|
||||
func (cm *CompactionManager) IsFileObsolete(path string) bool {
|
||||
cm.filesMu.RLock()
|
||||
defer cm.filesMu.RUnlock()
|
||||
|
||||
return cm.obsoleteFiles[path]
|
||||
}
|
||||
|
||||
// IsFilePending checks if a file is pending compaction
|
||||
func (cm *CompactionManager) IsFilePending(path string) bool {
|
||||
cm.filesMu.RLock()
|
||||
defer cm.filesMu.RUnlock()
|
||||
|
||||
return cm.pendingFiles[path]
|
||||
}
|
||||
|
||||
// CleanupObsoleteFiles removes files that are no longer needed
|
||||
func (cm *CompactionManager) CleanupObsoleteFiles() error {
|
||||
cm.filesMu.Lock()
|
||||
defer cm.filesMu.Unlock()
|
||||
|
||||
// Safely remove obsolete files that aren't pending
|
||||
for path := range cm.obsoleteFiles {
|
||||
// Skip files that are still being used in a compaction
|
||||
if cm.pendingFiles[path] {
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to delete the file
|
||||
if err := os.Remove(path); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to delete obsolete file %s: %w", path, err)
|
||||
}
|
||||
// If the file doesn't exist, remove it from our tracking
|
||||
delete(cm.obsoleteFiles, path)
|
||||
} else {
|
||||
// Successfully deleted, remove from tracking
|
||||
delete(cm.obsoleteFiles, path)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// compactionWorker runs the compaction loop
|
||||
func (cm *CompactionManager) compactionWorker() {
|
||||
// Ensure a minimum interval of 1 second
|
||||
interval := cm.cfg.CompactionInterval
|
||||
if interval <= 0 {
|
||||
interval = 1
|
||||
}
|
||||
ticker := time.NewTicker(time.Duration(interval) * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-cm.stopCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
// Only one compaction at a time
|
||||
cm.compactingMu.Lock()
|
||||
|
||||
// Run a compaction cycle
|
||||
err := cm.runCompactionCycle()
|
||||
if err != nil {
|
||||
// In a real system, we'd log this error
|
||||
// fmt.Printf("Compaction error: %v\n", err)
|
||||
}
|
||||
|
||||
// Try to clean up obsolete files
|
||||
err = cm.CleanupObsoleteFiles()
|
||||
if err != nil {
|
||||
// In a real system, we'd log this error
|
||||
// fmt.Printf("Cleanup error: %v\n", err)
|
||||
}
|
||||
|
||||
// Collect tombstone garbage periodically
|
||||
cm.tombstones.CollectGarbage()
|
||||
|
||||
cm.compactingMu.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runCompactionCycle performs a single compaction cycle
|
||||
func (cm *CompactionManager) runCompactionCycle() error {
|
||||
// Reload SSTables to get fresh information
|
||||
if err := cm.compactor.LoadSSTables(); err != nil {
|
||||
return fmt.Errorf("failed to load SSTables: %w", err)
|
||||
}
|
||||
|
||||
// Select files for compaction
|
||||
task, err := cm.compactor.SelectCompaction()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to select files for compaction: %w", err)
|
||||
}
|
||||
|
||||
// If no compaction needed, return
|
||||
if task == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Mark files as pending
|
||||
for _, files := range task.InputFiles {
|
||||
for _, file := range files {
|
||||
cm.MarkFilePending(file.Path)
|
||||
}
|
||||
}
|
||||
|
||||
// Perform compaction
|
||||
outputFiles, err := cm.compactor.CompactFiles(task)
|
||||
|
||||
// Unmark files as pending
|
||||
for _, files := range task.InputFiles {
|
||||
for _, file := range files {
|
||||
cm.UnmarkFilePending(file.Path)
|
||||
}
|
||||
}
|
||||
|
||||
// Track the compaction outputs for statistics
|
||||
if err == nil && len(outputFiles) > 0 {
|
||||
// Record the compaction result
|
||||
cm.filesMu.Lock()
|
||||
cm.lastCompactionOutputs = outputFiles
|
||||
cm.filesMu.Unlock()
|
||||
}
|
||||
|
||||
// Handle compaction errors
|
||||
if err != nil {
|
||||
return fmt.Errorf("compaction failed: %w", err)
|
||||
}
|
||||
|
||||
// Mark input files as obsolete
|
||||
for _, files := range task.InputFiles {
|
||||
for _, file := range files {
|
||||
cm.MarkFileObsolete(file.Path)
|
||||
}
|
||||
}
|
||||
|
||||
// Try to clean up the files immediately
|
||||
return cm.CleanupObsoleteFiles()
|
||||
}
|
||||
|
||||
// TriggerCompaction forces a compaction cycle
|
||||
func (cm *CompactionManager) TriggerCompaction() error {
|
||||
cm.compactingMu.Lock()
|
||||
defer cm.compactingMu.Unlock()
|
||||
|
||||
return cm.runCompactionCycle()
|
||||
}
|
||||
|
||||
// CompactRange triggers compaction on a specific key range
|
||||
func (cm *CompactionManager) CompactRange(minKey, maxKey []byte) error {
|
||||
cm.compactingMu.Lock()
|
||||
defer cm.compactingMu.Unlock()
|
||||
|
||||
// Load current SSTable information
|
||||
if err := cm.compactor.LoadSSTables(); err != nil {
|
||||
return fmt.Errorf("failed to load SSTables: %w", err)
|
||||
}
|
||||
|
||||
// Find files that overlap with the target range
|
||||
rangeInfo := &SSTableInfo{
|
||||
FirstKey: minKey,
|
||||
LastKey: maxKey,
|
||||
}
|
||||
|
||||
var filesToCompact []*SSTableInfo
|
||||
for level, files := range cm.compactor.levels {
|
||||
for _, file := range files {
|
||||
if file.Overlaps(rangeInfo) {
|
||||
// Add level information to the file
|
||||
file.Level = level
|
||||
filesToCompact = append(filesToCompact, file)
|
||||
|
||||
// Mark as pending
|
||||
cm.MarkFilePending(file.Path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no files to compact, we're done
|
||||
if len(filesToCompact) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Determine the target level - use the highest level found + 1
|
||||
targetLevel := 0
|
||||
for _, file := range filesToCompact {
|
||||
if file.Level > targetLevel {
|
||||
targetLevel = file.Level
|
||||
}
|
||||
}
|
||||
targetLevel++ // Compact to next level
|
||||
|
||||
// Create the compaction task
|
||||
task := &CompactionTask{
|
||||
InputFiles: make(map[int][]*SSTableInfo),
|
||||
TargetLevel: targetLevel,
|
||||
OutputPathTemplate: filepath.Join(cm.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
// Group files by level
|
||||
for _, file := range filesToCompact {
|
||||
task.InputFiles[file.Level] = append(task.InputFiles[file.Level], file)
|
||||
}
|
||||
|
||||
// Perform the compaction
|
||||
outputFiles, err := cm.compactor.CompactFiles(task)
|
||||
|
||||
// Unmark files as pending
|
||||
for _, file := range filesToCompact {
|
||||
cm.UnmarkFilePending(file.Path)
|
||||
}
|
||||
|
||||
// Track the compaction outputs for statistics
|
||||
if err == nil && len(outputFiles) > 0 {
|
||||
cm.filesMu.Lock()
|
||||
cm.lastCompactionOutputs = outputFiles
|
||||
cm.filesMu.Unlock()
|
||||
}
|
||||
|
||||
// Handle errors
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to compact range: %w", err)
|
||||
}
|
||||
|
||||
// Mark input files as obsolete
|
||||
for _, file := range filesToCompact {
|
||||
cm.MarkFileObsolete(file.Path)
|
||||
}
|
||||
|
||||
// Try to clean up immediately
|
||||
return cm.CleanupObsoleteFiles()
|
||||
}
|
||||
|
||||
// GetCompactionStats returns statistics about the compaction state
|
||||
func (cm *CompactionManager) GetCompactionStats() map[string]interface{} {
|
||||
cm.filesMu.RLock()
|
||||
defer cm.filesMu.RUnlock()
|
||||
|
||||
stats := make(map[string]interface{})
|
||||
|
||||
// Count files by level
|
||||
levelCounts := make(map[int]int)
|
||||
levelSizes := make(map[int]int64)
|
||||
|
||||
for level, files := range cm.compactor.levels {
|
||||
levelCounts[level] = len(files)
|
||||
|
||||
var totalSize int64
|
||||
for _, file := range files {
|
||||
totalSize += file.Size
|
||||
}
|
||||
levelSizes[level] = totalSize
|
||||
}
|
||||
|
||||
stats["level_counts"] = levelCounts
|
||||
stats["level_sizes"] = levelSizes
|
||||
stats["obsolete_files"] = len(cm.obsoleteFiles)
|
||||
stats["pending_files"] = len(cm.pendingFiles)
|
||||
stats["last_outputs_count"] = len(cm.lastCompactionOutputs)
|
||||
|
||||
// If there are recent compaction outputs, include information
|
||||
if len(cm.lastCompactionOutputs) > 0 {
|
||||
stats["last_outputs"] = cm.lastCompactionOutputs
|
||||
}
|
||||
|
||||
return stats
|
||||
}
|
363
pkg/compaction/tiered.go
Normal file
363
pkg/compaction/tiered.go
Normal file
@ -0,0 +1,363 @@
|
||||
package compaction
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"git.canoozie.net/jer/go-storage/pkg/config"
|
||||
"git.canoozie.net/jer/go-storage/pkg/iterator"
|
||||
"git.canoozie.net/jer/go-storage/pkg/sstable"
|
||||
)
|
||||
|
||||
// TieredCompactor implements a tiered compaction strategy
|
||||
type TieredCompactor struct {
|
||||
*Compactor
|
||||
|
||||
// Next file sequence number
|
||||
nextFileSeq uint64
|
||||
}
|
||||
|
||||
// NewTieredCompactor creates a new tiered compaction manager
|
||||
func NewTieredCompactor(cfg *config.Config, sstableDir string) *TieredCompactor {
|
||||
return &TieredCompactor{
|
||||
Compactor: NewCompactor(cfg, sstableDir),
|
||||
nextFileSeq: 1,
|
||||
}
|
||||
}
|
||||
|
||||
// SelectCompaction selects files for tiered compaction
|
||||
func (tc *TieredCompactor) SelectCompaction() (*CompactionTask, error) {
|
||||
// Reload SSTable information
|
||||
if err := tc.LoadSSTables(); err != nil {
|
||||
return nil, fmt.Errorf("failed to load SSTables: %w", err)
|
||||
}
|
||||
|
||||
// Determine the maximum level
|
||||
maxLevel := 0
|
||||
for level := range tc.levels {
|
||||
if level > maxLevel {
|
||||
maxLevel = level
|
||||
}
|
||||
}
|
||||
|
||||
// Check L0 first (special case due to potential overlaps)
|
||||
if len(tc.levels[0]) >= tc.cfg.MaxMemTables {
|
||||
return tc.selectL0Compaction()
|
||||
}
|
||||
|
||||
// Check size-based conditions for other levels
|
||||
for level := 0; level < maxLevel; level++ {
|
||||
// If this level is too large compared to the next level
|
||||
thisLevelSize := tc.GetLevelSize(level)
|
||||
nextLevelSize := tc.GetLevelSize(level + 1)
|
||||
|
||||
// If level is empty, skip it
|
||||
if thisLevelSize == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// If next level is empty, promote a file
|
||||
if nextLevelSize == 0 && len(tc.levels[level]) > 0 {
|
||||
return tc.selectPromotionCompaction(level)
|
||||
}
|
||||
|
||||
// Check size ratio
|
||||
sizeRatio := float64(thisLevelSize) / float64(nextLevelSize)
|
||||
if sizeRatio >= tc.cfg.CompactionRatio {
|
||||
return tc.selectOverlappingCompaction(level)
|
||||
}
|
||||
}
|
||||
|
||||
// No compaction needed
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// selectL0Compaction selects files from L0 for compaction
|
||||
func (tc *TieredCompactor) selectL0Compaction() (*CompactionTask, error) {
|
||||
// Require at least some files in L0
|
||||
if len(tc.levels[0]) < 2 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Sort L0 files by sequence number to prioritize older files
|
||||
files := make([]*SSTableInfo, len(tc.levels[0]))
|
||||
copy(files, tc.levels[0])
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].Sequence < files[j].Sequence
|
||||
})
|
||||
|
||||
// Take up to maxCompactFiles from L0
|
||||
maxCompactFiles := tc.cfg.MaxMemTables
|
||||
if maxCompactFiles > len(files) {
|
||||
maxCompactFiles = len(files)
|
||||
}
|
||||
|
||||
selectedFiles := files[:maxCompactFiles]
|
||||
|
||||
// Determine the key range covered by selected files
|
||||
var minKey, maxKey []byte
|
||||
for _, file := range selectedFiles {
|
||||
if len(minKey) == 0 || bytes.Compare(file.FirstKey, minKey) < 0 {
|
||||
minKey = file.FirstKey
|
||||
}
|
||||
if len(maxKey) == 0 || bytes.Compare(file.LastKey, maxKey) > 0 {
|
||||
maxKey = file.LastKey
|
||||
}
|
||||
}
|
||||
|
||||
// Find overlapping files in L1
|
||||
var l1Files []*SSTableInfo
|
||||
for _, file := range tc.levels[1] {
|
||||
// Create a temporary SSTableInfo with the key range
|
||||
rangeInfo := &SSTableInfo{
|
||||
FirstKey: minKey,
|
||||
LastKey: maxKey,
|
||||
}
|
||||
|
||||
if file.Overlaps(rangeInfo) {
|
||||
l1Files = append(l1Files, file)
|
||||
}
|
||||
}
|
||||
|
||||
// Create the compaction task
|
||||
task := &CompactionTask{
|
||||
InputFiles: map[int][]*SSTableInfo{
|
||||
0: selectedFiles,
|
||||
1: l1Files,
|
||||
},
|
||||
TargetLevel: 1,
|
||||
OutputPathTemplate: filepath.Join(tc.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
return task, nil
|
||||
}
|
||||
|
||||
// selectPromotionCompaction selects a file to promote to the next level
|
||||
func (tc *TieredCompactor) selectPromotionCompaction(level int) (*CompactionTask, error) {
|
||||
// Sort files by sequence number
|
||||
files := make([]*SSTableInfo, len(tc.levels[level]))
|
||||
copy(files, tc.levels[level])
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].Sequence < files[j].Sequence
|
||||
})
|
||||
|
||||
// Select the oldest file
|
||||
file := files[0]
|
||||
|
||||
// Create task to promote this file to the next level
|
||||
// No need to merge with any other files since the next level is empty
|
||||
task := &CompactionTask{
|
||||
InputFiles: map[int][]*SSTableInfo{
|
||||
level: {file},
|
||||
},
|
||||
TargetLevel: level + 1,
|
||||
OutputPathTemplate: filepath.Join(tc.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
return task, nil
|
||||
}
|
||||
|
||||
// selectOverlappingCompaction selects files for compaction based on key overlap
|
||||
func (tc *TieredCompactor) selectOverlappingCompaction(level int) (*CompactionTask, error) {
|
||||
// Sort files by sequence number to start with oldest
|
||||
files := make([]*SSTableInfo, len(tc.levels[level]))
|
||||
copy(files, tc.levels[level])
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].Sequence < files[j].Sequence
|
||||
})
|
||||
|
||||
// Select an initial file from this level
|
||||
file := files[0]
|
||||
|
||||
// Find all overlapping files in the next level
|
||||
var nextLevelFiles []*SSTableInfo
|
||||
for _, nextFile := range tc.levels[level+1] {
|
||||
if file.Overlaps(nextFile) {
|
||||
nextLevelFiles = append(nextLevelFiles, nextFile)
|
||||
}
|
||||
}
|
||||
|
||||
// Create the compaction task
|
||||
task := &CompactionTask{
|
||||
InputFiles: map[int][]*SSTableInfo{
|
||||
level: {file},
|
||||
level + 1: nextLevelFiles,
|
||||
},
|
||||
TargetLevel: level + 1,
|
||||
OutputPathTemplate: filepath.Join(tc.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
return task, nil
|
||||
}
|
||||
|
||||
// Compact performs compaction of the selected files
|
||||
func (tc *TieredCompactor) Compact(task *CompactionTask) error {
|
||||
if task == nil {
|
||||
return nil // Nothing to compact
|
||||
}
|
||||
|
||||
// Perform the compaction
|
||||
_, err := tc.CompactFiles(task)
|
||||
if err != nil {
|
||||
return fmt.Errorf("compaction failed: %w", err)
|
||||
}
|
||||
|
||||
// Gather all input file paths for cleanup
|
||||
var inputPaths []string
|
||||
for _, files := range task.InputFiles {
|
||||
for _, file := range files {
|
||||
inputPaths = append(inputPaths, file.Path)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the original files that were compacted
|
||||
if err := tc.DeleteCompactedFiles(inputPaths); err != nil {
|
||||
return fmt.Errorf("failed to clean up compacted files: %w", err)
|
||||
}
|
||||
|
||||
// Reload SSTables to refresh our file list
|
||||
if err := tc.LoadSSTables(); err != nil {
|
||||
return fmt.Errorf("failed to reload SSTables: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CompactRange performs compaction on a specific key range
|
||||
func (tc *TieredCompactor) CompactRange(minKey, maxKey []byte) error {
|
||||
// Create a range info to check for overlaps
|
||||
rangeInfo := &SSTableInfo{
|
||||
FirstKey: minKey,
|
||||
LastKey: maxKey,
|
||||
}
|
||||
|
||||
// Find files overlapping with the given range in each level
|
||||
task := &CompactionTask{
|
||||
InputFiles: make(map[int][]*SSTableInfo),
|
||||
TargetLevel: 0, // Will be updated
|
||||
OutputPathTemplate: filepath.Join(tc.sstableDir, "%d_%06d_%020d.sst"),
|
||||
}
|
||||
|
||||
// Get the maximum level
|
||||
var maxLevel int
|
||||
for level := range tc.levels {
|
||||
if level > maxLevel {
|
||||
maxLevel = level
|
||||
}
|
||||
}
|
||||
|
||||
// Find overlapping files in each level
|
||||
for level := 0; level <= maxLevel; level++ {
|
||||
var overlappingFiles []*SSTableInfo
|
||||
|
||||
for _, file := range tc.levels[level] {
|
||||
if file.Overlaps(rangeInfo) {
|
||||
overlappingFiles = append(overlappingFiles, file)
|
||||
}
|
||||
}
|
||||
|
||||
if len(overlappingFiles) > 0 {
|
||||
task.InputFiles[level] = overlappingFiles
|
||||
}
|
||||
}
|
||||
|
||||
// If no files overlap with the range, no compaction needed
|
||||
totalInputFiles := 0
|
||||
for _, files := range task.InputFiles {
|
||||
totalInputFiles += len(files)
|
||||
}
|
||||
|
||||
if totalInputFiles == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set target level to the maximum level + 1
|
||||
task.TargetLevel = maxLevel + 1
|
||||
|
||||
// Perform the compaction
|
||||
return tc.Compact(task)
|
||||
}
|
||||
|
||||
// RunCompaction performs a full compaction cycle
|
||||
func (tc *TieredCompactor) RunCompaction() error {
|
||||
// Select files for compaction
|
||||
task, err := tc.SelectCompaction()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to select files for compaction: %w", err)
|
||||
}
|
||||
|
||||
// If no compaction needed, return
|
||||
if task == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Perform the compaction
|
||||
return tc.Compact(task)
|
||||
}
|
||||
|
||||
// MergeCompact merges iterators from multiple SSTables and writes to new files
|
||||
func (tc *TieredCompactor) MergeCompact(readers []*sstable.Reader, targetLevel int) ([]string, error) {
|
||||
// Create iterators for all input files
|
||||
var iterators []iterator.Iterator
|
||||
for _, reader := range readers {
|
||||
iterators = append(iterators, reader.NewIterator())
|
||||
}
|
||||
|
||||
// Create a merged iterator
|
||||
mergedIter := iterator.NewHierarchicalIterator(iterators)
|
||||
|
||||
// Create a new output file
|
||||
timestamp := time.Now().UnixNano()
|
||||
outputPath := filepath.Join(tc.sstableDir,
|
||||
fmt.Sprintf("%d_%06d_%020d.sst",
|
||||
targetLevel, tc.nextFileSeq, timestamp))
|
||||
tc.nextFileSeq++
|
||||
|
||||
writer, err := sstable.NewWriter(outputPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create SSTable writer: %w", err)
|
||||
}
|
||||
|
||||
// Write all entries from the merged iterator
|
||||
var lastKey []byte
|
||||
var entriesWritten int
|
||||
|
||||
mergedIter.SeekToFirst()
|
||||
for mergedIter.Valid() {
|
||||
key := mergedIter.Key()
|
||||
value := mergedIter.Value()
|
||||
|
||||
// Skip duplicates
|
||||
if lastKey != nil && bytes.Equal(key, lastKey) {
|
||||
mergedIter.Next()
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip tombstones (nil values) during compaction
|
||||
if value != nil {
|
||||
if err := writer.Add(key, value); err != nil {
|
||||
writer.Abort()
|
||||
return nil, fmt.Errorf("failed to add entry to SSTable: %w", err)
|
||||
}
|
||||
entriesWritten++
|
||||
}
|
||||
|
||||
lastKey = append(lastKey[:0], key...)
|
||||
mergedIter.Next()
|
||||
}
|
||||
|
||||
// Finish writing
|
||||
if entriesWritten > 0 {
|
||||
if err := writer.Finish(); err != nil {
|
||||
return nil, fmt.Errorf("failed to finish SSTable: %w", err)
|
||||
}
|
||||
return []string{outputPath}, nil
|
||||
}
|
||||
|
||||
// No entries written, abort the file
|
||||
writer.Abort()
|
||||
return nil, nil
|
||||
}
|
184
pkg/compaction/tombstone.go
Normal file
184
pkg/compaction/tombstone.go
Normal file
@ -0,0 +1,184 @@
|
||||
package compaction
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TombstoneTracker tracks tombstones to support garbage collection
|
||||
type TombstoneTracker struct {
|
||||
// Map of deleted keys with deletion timestamp
|
||||
deletions map[string]time.Time
|
||||
|
||||
// Retention period for tombstones (after this time, they can be discarded)
|
||||
retention time.Duration
|
||||
}
|
||||
|
||||
// NewTombstoneTracker creates a new tombstone tracker
|
||||
func NewTombstoneTracker(retentionPeriod time.Duration) *TombstoneTracker {
|
||||
return &TombstoneTracker{
|
||||
deletions: make(map[string]time.Time),
|
||||
retention: retentionPeriod,
|
||||
}
|
||||
}
|
||||
|
||||
// AddTombstone records a key deletion
|
||||
func (t *TombstoneTracker) AddTombstone(key []byte) {
|
||||
t.deletions[string(key)] = time.Now()
|
||||
}
|
||||
|
||||
// ShouldKeepTombstone checks if a tombstone should be preserved during compaction
|
||||
func (t *TombstoneTracker) ShouldKeepTombstone(key []byte) bool {
|
||||
strKey := string(key)
|
||||
timestamp, exists := t.deletions[strKey]
|
||||
if !exists {
|
||||
return false // Not a tracked tombstone
|
||||
}
|
||||
|
||||
// Keep the tombstone if it's still within the retention period
|
||||
return time.Since(timestamp) < t.retention
|
||||
}
|
||||
|
||||
// CollectGarbage removes expired tombstone records
|
||||
func (t *TombstoneTracker) CollectGarbage() {
|
||||
now := time.Now()
|
||||
for key, timestamp := range t.deletions {
|
||||
if now.Sub(timestamp) > t.retention {
|
||||
delete(t.deletions, key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TombstoneFilter is an interface for filtering tombstones during compaction
|
||||
type TombstoneFilter interface {
|
||||
// ShouldKeep determines if a key-value pair should be kept during compaction
|
||||
// If value is nil, it's a tombstone marker
|
||||
ShouldKeep(key, value []byte) bool
|
||||
}
|
||||
|
||||
// BasicTombstoneFilter implements a simple filter that keeps all non-tombstone entries
|
||||
// and keeps tombstones during certain (lower) levels of compaction
|
||||
type BasicTombstoneFilter struct {
|
||||
// The level of compaction (higher levels discard more tombstones)
|
||||
level int
|
||||
|
||||
// The maximum level to retain tombstones
|
||||
maxTombstoneLevel int
|
||||
|
||||
// The tombstone tracker (if any)
|
||||
tracker *TombstoneTracker
|
||||
}
|
||||
|
||||
// NewBasicTombstoneFilter creates a new tombstone filter
|
||||
func NewBasicTombstoneFilter(level, maxTombstoneLevel int, tracker *TombstoneTracker) *BasicTombstoneFilter {
|
||||
return &BasicTombstoneFilter{
|
||||
level: level,
|
||||
maxTombstoneLevel: maxTombstoneLevel,
|
||||
tracker: tracker,
|
||||
}
|
||||
}
|
||||
|
||||
// ShouldKeep determines if a key-value pair should be kept
|
||||
func (f *BasicTombstoneFilter) ShouldKeep(key, value []byte) bool {
|
||||
// Always keep normal entries (non-tombstones)
|
||||
if value != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// For tombstones (value == nil):
|
||||
|
||||
// If we have a tracker, use it to determine if the tombstone is still needed
|
||||
if f.tracker != nil {
|
||||
return f.tracker.ShouldKeepTombstone(key)
|
||||
}
|
||||
|
||||
// Otherwise use level-based heuristic
|
||||
// Keep tombstones in lower levels, discard in higher levels
|
||||
return f.level <= f.maxTombstoneLevel
|
||||
}
|
||||
|
||||
// TimeBasedTombstoneFilter implements a filter that keeps tombstones based on age
|
||||
type TimeBasedTombstoneFilter struct {
|
||||
// Map of key to deletion time
|
||||
deletionTimes map[string]time.Time
|
||||
|
||||
// Current time (for testing)
|
||||
now time.Time
|
||||
|
||||
// Retention period
|
||||
retention time.Duration
|
||||
}
|
||||
|
||||
// NewTimeBasedTombstoneFilter creates a new time-based tombstone filter
|
||||
func NewTimeBasedTombstoneFilter(deletionTimes map[string]time.Time, retention time.Duration) *TimeBasedTombstoneFilter {
|
||||
return &TimeBasedTombstoneFilter{
|
||||
deletionTimes: deletionTimes,
|
||||
now: time.Now(),
|
||||
retention: retention,
|
||||
}
|
||||
}
|
||||
|
||||
// ShouldKeep determines if a key-value pair should be kept
|
||||
func (f *TimeBasedTombstoneFilter) ShouldKeep(key, value []byte) bool {
|
||||
// Always keep normal entries
|
||||
if value != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// For tombstones, check if we know when this key was deleted
|
||||
strKey := string(key)
|
||||
deleteTime, found := f.deletionTimes[strKey]
|
||||
if !found {
|
||||
// If we don't know when it was deleted, keep it to be safe
|
||||
return true
|
||||
}
|
||||
|
||||
// If the tombstone is older than our retention period, we can discard it
|
||||
return f.now.Sub(deleteTime) <= f.retention
|
||||
}
|
||||
|
||||
// KeyRangeTombstoneFilter filters tombstones by key range
|
||||
type KeyRangeTombstoneFilter struct {
|
||||
// Minimum key in the range (inclusive)
|
||||
minKey []byte
|
||||
|
||||
// Maximum key in the range (exclusive)
|
||||
maxKey []byte
|
||||
|
||||
// Delegate filter
|
||||
delegate TombstoneFilter
|
||||
}
|
||||
|
||||
// NewKeyRangeTombstoneFilter creates a new key range tombstone filter
|
||||
func NewKeyRangeTombstoneFilter(minKey, maxKey []byte, delegate TombstoneFilter) *KeyRangeTombstoneFilter {
|
||||
return &KeyRangeTombstoneFilter{
|
||||
minKey: minKey,
|
||||
maxKey: maxKey,
|
||||
delegate: delegate,
|
||||
}
|
||||
}
|
||||
|
||||
// ShouldKeep determines if a key-value pair should be kept
|
||||
func (f *KeyRangeTombstoneFilter) ShouldKeep(key, value []byte) bool {
|
||||
// Always keep normal entries
|
||||
if value != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if the key is in our targeted range
|
||||
inRange := true
|
||||
if f.minKey != nil && bytes.Compare(key, f.minKey) < 0 {
|
||||
inRange = false
|
||||
}
|
||||
if f.maxKey != nil && bytes.Compare(key, f.maxKey) >= 0 {
|
||||
inRange = false
|
||||
}
|
||||
|
||||
// If not in range, keep the tombstone
|
||||
if !inRange {
|
||||
return true
|
||||
}
|
||||
|
||||
// Otherwise, delegate to the wrapped filter
|
||||
return f.delegate.ShouldKeep(key, value)
|
||||
}
|
145
pkg/engine/compaction.go
Normal file
145
pkg/engine/compaction.go
Normal file
@ -0,0 +1,145 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"git.canoozie.net/jer/go-storage/pkg/compaction"
|
||||
"git.canoozie.net/jer/go-storage/pkg/sstable"
|
||||
)
|
||||
|
||||
// setupCompaction initializes the compaction manager for the engine
|
||||
func (e *Engine) setupCompaction() error {
|
||||
// Create the compaction manager
|
||||
e.compactionMgr = compaction.NewCompactionManager(e.cfg, e.sstableDir)
|
||||
|
||||
// Start the compaction manager
|
||||
return e.compactionMgr.Start()
|
||||
}
|
||||
|
||||
// shutdownCompaction stops the compaction manager
|
||||
func (e *Engine) shutdownCompaction() error {
|
||||
if e.compactionMgr != nil {
|
||||
return e.compactionMgr.Stop()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TriggerCompaction forces a compaction cycle
|
||||
func (e *Engine) TriggerCompaction() error {
|
||||
e.mu.RLock()
|
||||
defer e.mu.RUnlock()
|
||||
|
||||
if e.closed.Load() {
|
||||
return ErrEngineClosed
|
||||
}
|
||||
|
||||
if e.compactionMgr == nil {
|
||||
return fmt.Errorf("compaction manager not initialized")
|
||||
}
|
||||
|
||||
return e.compactionMgr.TriggerCompaction()
|
||||
}
|
||||
|
||||
// CompactRange forces compaction on a specific key range
|
||||
func (e *Engine) CompactRange(startKey, endKey []byte) error {
|
||||
e.mu.RLock()
|
||||
defer e.mu.RUnlock()
|
||||
|
||||
if e.closed.Load() {
|
||||
return ErrEngineClosed
|
||||
}
|
||||
|
||||
if e.compactionMgr == nil {
|
||||
return fmt.Errorf("compaction manager not initialized")
|
||||
}
|
||||
|
||||
return e.compactionMgr.CompactRange(startKey, endKey)
|
||||
}
|
||||
|
||||
// reloadSSTables reloads all SSTables from disk after compaction
|
||||
func (e *Engine) reloadSSTables() error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
// Close existing SSTable readers
|
||||
for _, reader := range e.sstables {
|
||||
if err := reader.Close(); err != nil {
|
||||
return fmt.Errorf("failed to close SSTable reader: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the list
|
||||
e.sstables = e.sstables[:0]
|
||||
|
||||
// Find all SSTable files
|
||||
entries, err := os.ReadDir(e.sstableDir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil // Directory doesn't exist yet
|
||||
}
|
||||
return fmt.Errorf("failed to read SSTable directory: %w", err)
|
||||
}
|
||||
|
||||
// Open all SSTable files
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() || filepath.Ext(entry.Name()) != ".sst" {
|
||||
continue // Skip directories and non-SSTable files
|
||||
}
|
||||
|
||||
path := filepath.Join(e.sstableDir, entry.Name())
|
||||
reader, err := sstable.OpenReader(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open SSTable %s: %w", path, err)
|
||||
}
|
||||
|
||||
e.sstables = append(e.sstables, reader)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetCompactionStats returns statistics about the compaction state
|
||||
func (e *Engine) GetCompactionStats() (map[string]interface{}, error) {
|
||||
e.mu.RLock()
|
||||
defer e.mu.RUnlock()
|
||||
|
||||
if e.closed.Load() {
|
||||
return nil, ErrEngineClosed
|
||||
}
|
||||
|
||||
if e.compactionMgr == nil {
|
||||
return map[string]interface{}{
|
||||
"enabled": false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
stats := e.compactionMgr.GetCompactionStats()
|
||||
stats["enabled"] = true
|
||||
|
||||
// Add memtable information
|
||||
stats["memtables"] = map[string]interface{}{
|
||||
"active": len(e.memTablePool.GetMemTables()),
|
||||
"immutable": len(e.immutableMTs),
|
||||
"total_size": e.memTablePool.TotalSize(),
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// maybeScheduleCompaction checks if compaction should be scheduled
|
||||
func (e *Engine) maybeScheduleCompaction() {
|
||||
// No immediate action needed - the compaction manager handles it all
|
||||
// This is just a hook for future expansion
|
||||
|
||||
// We could trigger a manual compaction in some cases
|
||||
if e.compactionMgr != nil && len(e.sstables) > e.cfg.MaxMemTables*2 {
|
||||
go func() {
|
||||
err := e.compactionMgr.TriggerCompaction()
|
||||
if err != nil {
|
||||
// In a real implementation, we would log this error
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
248
pkg/engine/compaction_test.go
Normal file
248
pkg/engine/compaction_test.go
Normal file
@ -0,0 +1,248 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestEngine_Compaction(t *testing.T) {
|
||||
// Create a temp directory for the test
|
||||
dir, err := os.MkdirTemp("", "engine-compaction-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
// Create the engine with small thresholds to trigger compaction easily
|
||||
engine, err := NewEngine(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create engine: %v", err)
|
||||
}
|
||||
|
||||
// Modify config for testing
|
||||
engine.cfg.MemTableSize = 1024 // 1KB
|
||||
engine.cfg.MaxMemTables = 2 // Only allow 2 immutable tables
|
||||
|
||||
// Insert several keys to create multiple SSTables
|
||||
for i := 0; i < 10; i++ {
|
||||
for j := 0; j < 10; j++ {
|
||||
key := []byte(fmt.Sprintf("key-%d-%d", i, j))
|
||||
value := []byte(fmt.Sprintf("value-%d-%d", i, j))
|
||||
|
||||
if err := engine.Put(key, value); err != nil {
|
||||
t.Fatalf("Failed to put key-value: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Force a flush after each batch to create multiple SSTables
|
||||
if err := engine.FlushImMemTables(); err != nil {
|
||||
t.Fatalf("Failed to flush memtables: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger compaction
|
||||
if err := engine.TriggerCompaction(); err != nil {
|
||||
t.Fatalf("Failed to trigger compaction: %v", err)
|
||||
}
|
||||
|
||||
// Sleep to give compaction time to complete
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Verify that all keys are still accessible
|
||||
for i := 0; i < 10; i++ {
|
||||
for j := 0; j < 10; j++ {
|
||||
key := []byte(fmt.Sprintf("key-%d-%d", i, j))
|
||||
expectedValue := []byte(fmt.Sprintf("value-%d-%d", i, j))
|
||||
|
||||
value, err := engine.Get(key)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to get key %s: %v", key, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !bytes.Equal(value, expectedValue) {
|
||||
t.Errorf("Got incorrect value for key %s. Expected: %s, Got: %s",
|
||||
string(key), string(expectedValue), string(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test compaction stats
|
||||
stats, err := engine.GetCompactionStats()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get compaction stats: %v", err)
|
||||
}
|
||||
|
||||
if stats["enabled"] != true {
|
||||
t.Errorf("Expected compaction to be enabled")
|
||||
}
|
||||
|
||||
// Close the engine
|
||||
if err := engine.Close(); err != nil {
|
||||
t.Fatalf("Failed to close engine: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngine_CompactRange(t *testing.T) {
|
||||
// Create a temp directory for the test
|
||||
dir, err := os.MkdirTemp("", "engine-compact-range-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
// Create the engine
|
||||
engine, err := NewEngine(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create engine: %v", err)
|
||||
}
|
||||
|
||||
// Insert keys with different prefixes
|
||||
prefixes := []string{"a", "b", "c", "d"}
|
||||
for _, prefix := range prefixes {
|
||||
for i := 0; i < 10; i++ {
|
||||
key := []byte(fmt.Sprintf("%s-key-%d", prefix, i))
|
||||
value := []byte(fmt.Sprintf("%s-value-%d", prefix, i))
|
||||
|
||||
if err := engine.Put(key, value); err != nil {
|
||||
t.Fatalf("Failed to put key-value: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Force a flush after each prefix
|
||||
if err := engine.FlushImMemTables(); err != nil {
|
||||
t.Fatalf("Failed to flush memtables: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Compact only the range with prefix "b"
|
||||
startKey := []byte("b")
|
||||
endKey := []byte("c")
|
||||
if err := engine.CompactRange(startKey, endKey); err != nil {
|
||||
t.Fatalf("Failed to compact range: %v", err)
|
||||
}
|
||||
|
||||
// Sleep to give compaction time to complete
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Verify that all keys are still accessible
|
||||
for _, prefix := range prefixes {
|
||||
for i := 0; i < 10; i++ {
|
||||
key := []byte(fmt.Sprintf("%s-key-%d", prefix, i))
|
||||
expectedValue := []byte(fmt.Sprintf("%s-value-%d", prefix, i))
|
||||
|
||||
value, err := engine.Get(key)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to get key %s: %v", key, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !bytes.Equal(value, expectedValue) {
|
||||
t.Errorf("Got incorrect value for key %s. Expected: %s, Got: %s",
|
||||
string(key), string(expectedValue), string(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close the engine
|
||||
if err := engine.Close(); err != nil {
|
||||
t.Fatalf("Failed to close engine: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEngine_TombstoneHandling(t *testing.T) {
|
||||
// Create a temp directory for the test
|
||||
dir, err := os.MkdirTemp("", "engine-tombstone-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
// Create the engine
|
||||
engine, err := NewEngine(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create engine: %v", err)
|
||||
}
|
||||
|
||||
// Insert some keys
|
||||
for i := 0; i < 10; i++ {
|
||||
key := []byte(fmt.Sprintf("key-%d", i))
|
||||
value := []byte(fmt.Sprintf("value-%d", i))
|
||||
|
||||
if err := engine.Put(key, value); err != nil {
|
||||
t.Fatalf("Failed to put key-value: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Flush to create an SSTable
|
||||
if err := engine.FlushImMemTables(); err != nil {
|
||||
t.Fatalf("Failed to flush memtables: %v", err)
|
||||
}
|
||||
|
||||
// Delete some keys
|
||||
for i := 0; i < 5; i++ {
|
||||
key := []byte(fmt.Sprintf("key-%d", i))
|
||||
|
||||
if err := engine.Delete(key); err != nil {
|
||||
t.Fatalf("Failed to delete key: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Flush again to create another SSTable with tombstones
|
||||
if err := engine.FlushImMemTables(); err != nil {
|
||||
t.Fatalf("Failed to flush memtables: %v", err)
|
||||
}
|
||||
|
||||
// Count the number of SSTable files before compaction
|
||||
sstableFiles, err := filepath.Glob(filepath.Join(engine.sstableDir, "*.sst"))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list SSTable files: %v", err)
|
||||
}
|
||||
|
||||
// Log how many files we have before compaction
|
||||
t.Logf("Number of SSTable files before compaction: %d", len(sstableFiles))
|
||||
|
||||
// Trigger compaction
|
||||
if err := engine.TriggerCompaction(); err != nil {
|
||||
t.Fatalf("Failed to trigger compaction: %v", err)
|
||||
}
|
||||
|
||||
// Sleep to give compaction time to complete
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Verify deleted keys are still not accessible
|
||||
for i := 0; i < 5; i++ {
|
||||
key := []byte(fmt.Sprintf("key-%d", i))
|
||||
|
||||
_, err := engine.Get(key)
|
||||
if err != ErrKeyNotFound {
|
||||
t.Errorf("Expected key %s to be deleted, but got: %v", key, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify non-deleted keys are still accessible
|
||||
for i := 5; i < 10; i++ {
|
||||
key := []byte(fmt.Sprintf("key-%d", i))
|
||||
expectedValue := []byte(fmt.Sprintf("value-%d", i))
|
||||
|
||||
value, err := engine.Get(key)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to get key %s: %v", key, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !bytes.Equal(value, expectedValue) {
|
||||
t.Errorf("Got incorrect value for key %s. Expected: %s, Got: %s",
|
||||
string(key), string(expectedValue), string(value))
|
||||
}
|
||||
}
|
||||
|
||||
// Close the engine
|
||||
if err := engine.Close(); err != nil {
|
||||
t.Fatalf("Failed to close engine: %v", err)
|
||||
}
|
||||
}
|
@ -9,6 +9,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"git.canoozie.net/jer/go-storage/pkg/compaction"
|
||||
"git.canoozie.net/jer/go-storage/pkg/config"
|
||||
"git.canoozie.net/jer/go-storage/pkg/memtable"
|
||||
"git.canoozie.net/jer/go-storage/pkg/sstable"
|
||||
@ -45,6 +46,9 @@ type Engine struct {
|
||||
// Storage layer
|
||||
sstables []*sstable.Reader
|
||||
|
||||
// Compaction
|
||||
compactionMgr *compaction.CompactionManager
|
||||
|
||||
// State management
|
||||
nextFileNum uint64
|
||||
lastSeqNum uint64
|
||||
@ -119,6 +123,11 @@ func NewEngine(dataDir string) (*Engine, error) {
|
||||
// Start background flush goroutine
|
||||
go e.backgroundFlush()
|
||||
|
||||
// Initialize compaction
|
||||
if err := e.setupCompaction(); err != nil {
|
||||
return nil, fmt.Errorf("failed to set up compaction: %w", err)
|
||||
}
|
||||
|
||||
return e, nil
|
||||
}
|
||||
|
||||
@ -174,6 +183,10 @@ func (e *Engine) Get(key []byte) ([]byte, error) {
|
||||
for i := len(e.sstables) - 1; i >= 0; i-- {
|
||||
val, err := e.sstables[i].Get(key)
|
||||
if err == nil {
|
||||
// If val is nil, it's a tombstone marker - the key exists but was deleted
|
||||
if val == nil {
|
||||
return nil, ErrKeyNotFound
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
if !errors.Is(err, sstable.ErrNotFound) {
|
||||
@ -353,6 +366,9 @@ func (e *Engine) flushMemTable(mem *memtable.MemTable) error {
|
||||
e.sstables = append(e.sstables, reader)
|
||||
e.mu.Unlock()
|
||||
|
||||
// Maybe trigger compaction after flushing
|
||||
e.maybeScheduleCompaction()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -452,6 +468,11 @@ func (e *Engine) Close() error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
// Shutdown compaction manager
|
||||
if err := e.shutdownCompaction(); err != nil {
|
||||
return fmt.Errorf("failed to shutdown compaction: %w", err)
|
||||
}
|
||||
|
||||
// Close WAL first
|
||||
if err := e.wal.Close(); err != nil {
|
||||
return fmt.Errorf("failed to close WAL: %w", err)
|
||||
|
@ -164,3 +164,18 @@ func (p *MemTablePool) GetMemTables() []*MemTable {
|
||||
result = append(result, p.immutables...)
|
||||
return result
|
||||
}
|
||||
|
||||
// TotalSize returns the total approximate size of all memtables in the pool
|
||||
func (p *MemTablePool) TotalSize() int64 {
|
||||
p.mu.RLock()
|
||||
defer p.mu.RUnlock()
|
||||
|
||||
var total int64
|
||||
total += p.active.ApproximateSize()
|
||||
|
||||
for _, m := range p.immutables {
|
||||
total += m.ApproximateSize()
|
||||
}
|
||||
|
||||
return total
|
||||
}
|
9
pkg/sstable/reader.go
Normal file
9
pkg/sstable/reader.go
Normal file
@ -0,0 +1,9 @@
|
||||
package sstable
|
||||
|
||||
// GetKeyCount returns the estimated number of keys in the SSTable
|
||||
func (r *Reader) GetKeyCount() int {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
|
||||
return int(r.numEntries)
|
||||
}
|
Loading…
Reference in New Issue
Block a user