kevo/pkg/sstable/reader.go

package sstable

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"os"
	"sync"

	bloomfilter "github.com/KevoDB/kevo/pkg/bloom_filter"
	"github.com/KevoDB/kevo/pkg/sstable/block"
	"github.com/KevoDB/kevo/pkg/sstable/footer"
)

// IOManager handles file I/O operations for SSTable
type IOManager struct {
	path     string
	file     *os.File
	fileSize int64
	mu       sync.RWMutex
}

// NewIOManager creates a new IOManager for the given file path
func NewIOManager(path string) (*IOManager, error) {
	file, err := os.Open(path)
	if err != nil {
		return nil, fmt.Errorf("failed to open file: %w", err)
	}

	// Get file size
	stat, err := file.Stat()
	if err != nil {
		file.Close()
		return nil, fmt.Errorf("failed to stat file: %w", err)
	}

	return &IOManager{
		path:     path,
		file:     file,
		fileSize: stat.Size(),
	}, nil
}

// ReadAt reads data from the file at the given offset
func (io *IOManager) ReadAt(data []byte, offset int64) (int, error) {
	io.mu.RLock()
	defer io.mu.RUnlock()

	if io.file == nil {
		return 0, fmt.Errorf("file is closed")
	}

	return io.file.ReadAt(data, offset)
}

// GetFileSize returns the size of the file
func (io *IOManager) GetFileSize() int64 {
	io.mu.RLock()
	defer io.mu.RUnlock()
	return io.fileSize
}

// Close closes the file
func (io *IOManager) Close() error {
	io.mu.Lock()
	defer io.mu.Unlock()

	if io.file == nil {
		return nil
	}

	err := io.file.Close()
	io.file = nil
	return err
}

// BlockFetcher abstracts the fetching of data blocks
type BlockFetcher struct {
	io *IOManager
}

// NewBlockFetcher creates a new BlockFetcher
func NewBlockFetcher(io *IOManager) *BlockFetcher {
	return &BlockFetcher{io: io}
}

// FetchBlock reads and parses a data block at the given offset and size
func (bf *BlockFetcher) FetchBlock(offset uint64, size uint32) (*block.Reader, error) {
	// Read the data block
	blockData := make([]byte, size)
	n, err := bf.io.ReadAt(blockData, int64(offset))
	if err != nil {
		return nil, fmt.Errorf("failed to read data block at offset %d: %w", offset, err)
	}

	if n != int(size) {
		return nil, fmt.Errorf("incomplete block read: got %d bytes, expected %d: %w",
			n, size, ErrCorruption)
	}

	// Parse the block
	blockReader, err := block.NewReader(blockData)
	if err != nil {
		return nil, fmt.Errorf("failed to create block reader for block at offset %d: %w",
			offset, err)
	}

	return blockReader, nil
}

// BlockLocator represents an index entry pointing to a data block
type BlockLocator struct {
	Offset uint64
	Size   uint32
	Key    []byte
}

// ParseBlockLocator extracts block location information from an index entry
func ParseBlockLocator(key, value []byte) (BlockLocator, error) {
	if len(value) < 12 { // offset (8) + size (4)
		return BlockLocator{}, fmt.Errorf("invalid index entry (too short, length=%d): %w",
			len(value), ErrCorruption)
	}

	offset := binary.LittleEndian.Uint64(value[:8])
	size := binary.LittleEndian.Uint32(value[8:12])

	return BlockLocator{
		Offset: offset,
		Size:   size,
		Key:    key,
	}, nil
}

// BlockCache is a simple LRU cache for data blocks
type BlockCache struct {
	blocks    map[uint64]*block.Reader
	maxBlocks int
	// Using a simple approach for now - more sophisticated LRU could be implemented
	// with a linked list or other data structure for better eviction
	mu sync.RWMutex
}

// NewBlockCache creates a new block cache with the specified capacity
func NewBlockCache(capacity int) *BlockCache {
	return &BlockCache{
		blocks:    make(map[uint64]*block.Reader),
		maxBlocks: capacity,
	}
}

// Get retrieves a block from the cache
func (c *BlockCache) Get(offset uint64) (*block.Reader, bool) {
	c.mu.RLock()
	defer c.mu.RUnlock()

	block, found := c.blocks[offset]
	return block, found
}

// Put adds a block to the cache
func (c *BlockCache) Put(offset uint64, block *block.Reader) {
	c.mu.Lock()
	defer c.mu.Unlock()

	// If cache is full, evict a random block (simple strategy for now)
	if len(c.blocks) >= c.maxBlocks {
		// Pick a random offset to evict
		for k := range c.blocks {
			delete(c.blocks, k)
			break
		}
	}

	c.blocks[offset] = block
}

// BlockBloomFilter associates a bloom filter with a block offset
type BlockBloomFilter struct {
	blockOffset uint64
	filter      *bloomfilter.BloomFilter
}

// Reader reads an SSTable file
type Reader struct {
	ioManager    *IOManager
	blockFetcher *BlockFetcher
	indexOffset  uint64
	indexSize    uint32
	numEntries   uint32
	indexBlock   *block.Reader
	ft           *footer.Footer
	mu           sync.RWMutex
	// Add block cache
	blockCache *BlockCache
	// Add bloom filters
	bloomFilters   []BlockBloomFilter
	hasBloomFilter bool
}

// OpenReader opens an SSTable file for reading
func OpenReader(path string) (*Reader, error) {
	ioManager, err := NewIOManager(path)
	if err != nil {
		return nil, err
	}

	fileSize := ioManager.GetFileSize()

	// Ensure file is large enough for a footer
	if fileSize < int64(footer.FooterSize) {
		ioManager.Close()
		return nil, fmt.Errorf("file too small to be valid SSTable: %d bytes", fileSize)
	}

	// Read footer
	footerData := make([]byte, footer.FooterSize)
	_, err = ioManager.ReadAt(footerData, fileSize-int64(footer.FooterSize))
	if err != nil {
		ioManager.Close()
		return nil, fmt.Errorf("failed to read footer: %w", err)
	}

	ft, err := footer.Decode(footerData)
	if err != nil {
		ioManager.Close()
		return nil, fmt.Errorf("failed to decode footer: %w", err)
	}

	blockFetcher := NewBlockFetcher(ioManager)

	// Read index block
	indexData := make([]byte, ft.IndexSize)
	_, err = ioManager.ReadAt(indexData, int64(ft.IndexOffset))
	if err != nil {
		ioManager.Close()
		return nil, fmt.Errorf("failed to read index block: %w", err)
	}

	indexBlock, err := block.NewReader(indexData)
	if err != nil {
		ioManager.Close()
		return nil, fmt.Errorf("failed to create index block reader: %w", err)
	}

	// Initialize reader with basic fields
	reader := &Reader{
		ioManager:      ioManager,
		blockFetcher:   blockFetcher,
		indexOffset:    ft.IndexOffset,
		indexSize:      ft.IndexSize,
		numEntries:     ft.NumEntries,
		indexBlock:     indexBlock,
		ft:             ft,
		blockCache:     NewBlockCache(100), // Cache up to 100 blocks by default
		bloomFilters:   make([]BlockBloomFilter, 0),
		hasBloomFilter: ft.BloomFilterOffset > 0 && ft.BloomFilterSize > 0,
	}

	// Load bloom filters if they exist
	if reader.hasBloomFilter {
		// Read the bloom filter data
		bloomFilterData := make([]byte, ft.BloomFilterSize)
		_, err = ioManager.ReadAt(bloomFilterData, int64(ft.BloomFilterOffset))
		if err != nil {
			ioManager.Close()
			return nil, fmt.Errorf("failed to read bloom filter data: %w", err)
		}

		// Process the bloom filter data
		var pos uint32 = 0
		for pos < ft.BloomFilterSize {
			// Read the block offset and filter size
			if pos+12 > ft.BloomFilterSize {
				break // Not enough data for header
			}

			blockOffset := binary.LittleEndian.Uint64(bloomFilterData[pos : pos+8])
			filterSize := binary.LittleEndian.Uint32(bloomFilterData[pos+8 : pos+12])
			pos += 12

			// Ensure we have enough data for the filter
			if pos+filterSize > ft.BloomFilterSize {
				break
			}

			// Create a temporary file to load the bloom filter
			tempFile, err := os.CreateTemp("", "bloom-filter-*.tmp")
			if err != nil {
				continue // Skip this filter if we can't create temp file
			}
			tempPath := tempFile.Name()

			// Write the bloom filter data to the temp file
			_, err = tempFile.Write(bloomFilterData[pos : pos+filterSize])
			tempFile.Close()
			if err != nil {
				os.Remove(tempPath)
				continue
			}

			// Load the bloom filter
			filter, err := bloomfilter.LoadBloomFilter(tempPath)
			os.Remove(tempPath) // Clean up temp file

			if err != nil {
				continue // Skip this filter
			}

			// Add the bloom filter to our list
			reader.bloomFilters = append(reader.bloomFilters, BlockBloomFilter{
				blockOffset: blockOffset,
				filter:      filter,
			})

			// Move to the next filter
			pos += filterSize
		}
	}

	return reader, nil
}

// FindBlockForKey finds the block that might contain the given key
func (r *Reader) FindBlockForKey(key []byte) ([]BlockLocator, error) {
	r.mu.RLock()
	defer r.mu.RUnlock()

	var blocks []BlockLocator
	seenBlocks := make(map[uint64]bool)

	// First try binary search for efficiency - find the first block
	// where the first key is >= our target key
	indexIter := r.indexBlock.Iterator()
	indexIter.Seek(key)

	// If the seek fails, start from beginning to check all blocks
	if !indexIter.Valid() {
		indexIter.SeekToFirst()
	}

	// Process all potential blocks (starting from the one found by Seek)
	for ; indexIter.Valid(); indexIter.Next() {
		locator, err := ParseBlockLocator(indexIter.Key(), indexIter.Value())
		if err != nil {
			continue
		}

		// Skip blocks we've already seen
		if seenBlocks[locator.Offset] {
			continue
		}
		seenBlocks[locator.Offset] = true

		blocks = append(blocks, locator)
	}

	return blocks, nil
}

// SearchBlockForKey searches for a key within a specific block
func (r *Reader) SearchBlockForKey(blockReader *block.Reader, key []byte) ([]byte, bool) {
	blockIter := blockReader.Iterator()

	// Binary search within the block if possible
	if blockIter.Seek(key) && bytes.Equal(blockIter.Key(), key) {
		return blockIter.Value(), true
	}

	// If binary search fails, do a linear scan (for backup)
	for blockIter.SeekToFirst(); blockIter.Valid(); blockIter.Next() {
		if bytes.Equal(blockIter.Key(), key) {
			return blockIter.Value(), true
		}
	}

	return nil, false
}

// Get returns the value for a given key
func (r *Reader) Get(key []byte) ([]byte, error) {
	// Find potential blocks that might contain the key
	blocks, err := r.FindBlockForKey(key)
	if err != nil {
		return nil, err
	}

	// Search through each block
	for _, locator := range blocks {
		// Check bloom filter first if available
		if r.hasBloomFilter {
			// Find the bloom filter for this block
			var shouldSkip = true
			for _, bf := range r.bloomFilters {
				if bf.blockOffset == locator.Offset {
					// Found a bloom filter for this block
					// If the key might be in this block, we'll check it
					if bf.filter.Contains(key) {
						shouldSkip = false
					}
					break
				}
			}

			// If the bloom filter says the key definitely isn't in this block, skip it
			if shouldSkip {
				continue
			}
		}

		var blockReader *block.Reader

		// Try to get the block from cache first
		cachedBlock, found := r.blockCache.Get(locator.Offset)
		if found {
			// Use cached block
			blockReader = cachedBlock
		} else {
			// Block not in cache, fetch from disk
			blockReader, err = r.blockFetcher.FetchBlock(locator.Offset, locator.Size)
			if err != nil {
				return nil, err
			}

			// Add to cache for future use
			r.blockCache.Put(locator.Offset, blockReader)
		}

		// Search for the key in this block
		if value, found := r.SearchBlockForKey(blockReader, key); found {
			return value, nil
		}
	}

	return nil, ErrNotFound
}

// NewIterator returns an iterator over the entire SSTable
func (r *Reader) NewIterator() *Iterator {
	r.mu.RLock()
	defer r.mu.RUnlock()

	// Create a fresh block.Iterator for the index
	indexIter := r.indexBlock.Iterator()

	// Pre-check that we have at least one valid index entry
	indexIter.SeekToFirst()

	return &Iterator{
		reader:        r,
		indexIterator: indexIter,
		dataBlockIter: nil,
		currentBlock:  nil,
		initialized:   false,
	}
}

// Close closes the SSTable reader
func (r *Reader) Close() error {
	r.mu.Lock()
	defer r.mu.Unlock()

	return r.ioManager.Close()
}

// GetKeyCount returns the estimated number of keys in the SSTable
func (r *Reader) GetKeyCount() int {
	r.mu.RLock()
	defer r.mu.RUnlock()

	return int(r.numEntries)
}

// FilePath returns the file path of this SSTable
func (r *Reader) FilePath() string {
	r.mu.RLock()
	defer r.mu.RUnlock()

	return r.ioManager.path
}