kevo/pkg/sstable/block/block_builder.go
Jeremy Tregunna 6fc3be617d
Some checks failed
Go Tests / Run Tests (1.24.2) (push) Has been cancelled
feat: Initial release of kevo storage engine.
Adds a complete LSM-based storage engine with these features:
- Single-writer based architecture for the storage engine
- WAL for durability, and hey it's configurable
- MemTable with skip list implementation for fast read/writes
- SSTable with block-based structure for on-disk level-based storage
- Background compaction with tiered strategy
- ACID transactions
- Good documentation (I hope)
2025-04-20 14:06:50 -06:00

225 lines
5.9 KiB
Go

package block
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"github.com/cespare/xxhash/v2"
)
// Builder constructs a sorted, serialized block
type Builder struct {
entries []Entry
restartPoints []uint32
restartCount uint32
currentSize uint32
lastKey []byte
restartIdx int
}
// NewBuilder creates a new block builder
func NewBuilder() *Builder {
return &Builder{
entries: make([]Entry, 0, MaxBlockEntries),
restartPoints: make([]uint32, 0, MaxBlockEntries/RestartInterval+1),
restartCount: 0,
currentSize: 0,
}
}
// Add adds a key-value pair to the block
// Keys must be added in sorted order
func (b *Builder) Add(key, value []byte) error {
// Ensure keys are added in sorted order
if len(b.entries) > 0 && bytes.Compare(key, b.lastKey) <= 0 {
return fmt.Errorf("keys must be added in strictly increasing order, got %s after %s",
string(key), string(b.lastKey))
}
b.entries = append(b.entries, Entry{
Key: append([]byte(nil), key...), // Make copies to avoid references
Value: append([]byte(nil), value...), // to external data
})
// Add restart point if needed
if b.restartIdx == 0 || b.restartIdx >= RestartInterval {
b.restartPoints = append(b.restartPoints, b.currentSize)
b.restartIdx = 0
}
b.restartIdx++
// Track the size
b.currentSize += uint32(len(key) + len(value) + 8) // 8 bytes for metadata
b.lastKey = append([]byte(nil), key...)
return nil
}
// GetEntries returns the entries in the block
func (b *Builder) GetEntries() []Entry {
return b.entries
}
// Reset clears the builder state
func (b *Builder) Reset() {
b.entries = b.entries[:0]
b.restartPoints = b.restartPoints[:0]
b.restartCount = 0
b.currentSize = 0
b.lastKey = nil
b.restartIdx = 0
}
// EstimatedSize returns the approximate size of the block when serialized
func (b *Builder) EstimatedSize() uint32 {
if len(b.entries) == 0 {
return 0
}
// Data + restart points array + footer
return b.currentSize + uint32(len(b.restartPoints)*4) + BlockFooterSize
}
// Entries returns the number of entries in the block
func (b *Builder) Entries() int {
return len(b.entries)
}
// Finish serializes the block to a writer
func (b *Builder) Finish(w io.Writer) (uint64, error) {
if len(b.entries) == 0 {
return 0, fmt.Errorf("cannot finish empty block")
}
// Keys are already sorted by the Add method's requirement
// Remove any duplicate keys (keeping the last one)
if len(b.entries) > 1 {
uniqueEntries := make([]Entry, 0, len(b.entries))
for i := 0; i < len(b.entries); i++ {
// Skip if this is a duplicate of the previous entry
if i > 0 && bytes.Equal(b.entries[i].Key, b.entries[i-1].Key) {
// Replace the previous entry with this one (to keep the latest value)
uniqueEntries[len(uniqueEntries)-1] = b.entries[i]
} else {
uniqueEntries = append(uniqueEntries, b.entries[i])
}
}
b.entries = uniqueEntries
}
// Reset restart points
b.restartPoints = b.restartPoints[:0]
b.restartPoints = append(b.restartPoints, 0) // First entry is always a restart point
// Write all entries
content := make([]byte, 0, b.EstimatedSize())
buffer := bytes.NewBuffer(content)
var prevKey []byte
restartOffset := 0
for i, entry := range b.entries {
// Start a new restart point?
isRestart := i == 0 || restartOffset >= RestartInterval
if isRestart {
restartOffset = 0
if i > 0 {
b.restartPoints = append(b.restartPoints, uint32(buffer.Len()))
}
}
// Write entry
if isRestart {
// Full key for restart points
keyLen := uint16(len(entry.Key))
err := binary.Write(buffer, binary.LittleEndian, keyLen)
if err != nil {
return 0, fmt.Errorf("failed to write key length: %w", err)
}
n, err := buffer.Write(entry.Key)
if err != nil {
return 0, fmt.Errorf("failed to write key: %w", err)
}
if n != len(entry.Key) {
return 0, fmt.Errorf("wrote incomplete key: %d of %d bytes", n, len(entry.Key))
}
} else {
// For non-restart points, delta encode the key
commonPrefix := 0
for j := 0; j < len(prevKey) && j < len(entry.Key); j++ {
if prevKey[j] != entry.Key[j] {
break
}
commonPrefix++
}
// Format: [shared prefix length][unshared length][unshared bytes]
err := binary.Write(buffer, binary.LittleEndian, uint16(commonPrefix))
if err != nil {
return 0, fmt.Errorf("failed to write common prefix length: %w", err)
}
unsharedLen := uint16(len(entry.Key) - commonPrefix)
err = binary.Write(buffer, binary.LittleEndian, unsharedLen)
if err != nil {
return 0, fmt.Errorf("failed to write unshared length: %w", err)
}
n, err := buffer.Write(entry.Key[commonPrefix:])
if err != nil {
return 0, fmt.Errorf("failed to write unshared bytes: %w", err)
}
if n != int(unsharedLen) {
return 0, fmt.Errorf("wrote incomplete unshared bytes: %d of %d bytes", n, unsharedLen)
}
}
// Write value
valueLen := uint32(len(entry.Value))
err := binary.Write(buffer, binary.LittleEndian, valueLen)
if err != nil {
return 0, fmt.Errorf("failed to write value length: %w", err)
}
n, err := buffer.Write(entry.Value)
if err != nil {
return 0, fmt.Errorf("failed to write value: %w", err)
}
if n != len(entry.Value) {
return 0, fmt.Errorf("wrote incomplete value: %d of %d bytes", n, len(entry.Value))
}
prevKey = entry.Key
restartOffset++
}
// Write restart points
for _, point := range b.restartPoints {
binary.Write(buffer, binary.LittleEndian, point)
}
// Write number of restart points
binary.Write(buffer, binary.LittleEndian, uint32(len(b.restartPoints)))
// Calculate checksum
data := buffer.Bytes()
checksum := xxhash.Sum64(data)
// Write checksum
binary.Write(buffer, binary.LittleEndian, checksum)
// Write the entire buffer to the output writer
n, err := w.Write(buffer.Bytes())
if err != nil {
return 0, fmt.Errorf("failed to write block: %w", err)
}
if n != buffer.Len() {
return 0, fmt.Errorf("wrote incomplete block: %d of %d bytes", n, buffer.Len())
}
return checksum, nil
}