kevo/pkg/replication/primary.go
2025-05-17 14:58:26 -06:00

855 lines
26 KiB
Go

package replication
import (
"context"
"errors"
"fmt"
"sync"
"time"
"github.com/KevoDB/kevo/pkg/common/log"
"github.com/KevoDB/kevo/pkg/wal"
proto "github.com/KevoDB/kevo/proto/kevo/replication"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
)
// Primary implements the primary node functionality for WAL replication.
// It observes WAL entries and serves them to replica nodes.
type Primary struct {
wal *wal.WAL // Reference to the WAL
batcher *WALBatcher // Batches WAL entries for efficient transmission
compressor *CompressionManager // Handles compression/decompression
sessions map[string]*ReplicaSession // Active replica sessions
lastSyncedSeq uint64 // Highest sequence number synced to disk
retentionConfig WALRetentionConfig // Configuration for WAL retention
enableCompression bool // Whether compression is enabled
defaultCodec proto.CompressionCodec // Default compression codec
heartbeat *heartbeatManager // Manages heartbeats and session monitoring
mu sync.RWMutex // Protects sessions map
proto.UnimplementedWALReplicationServiceServer
}
// WALRetentionConfig defines WAL file retention policy
type WALRetentionConfig struct {
MaxAgeHours int // Maximum age of WAL files in hours
MinSequenceKeep uint64 // Minimum sequence number to preserve
}
// PrimaryConfig contains configuration for the primary node
type PrimaryConfig struct {
MaxBatchSizeKB int // Maximum batch size in KB
EnableCompression bool // Whether to enable compression
CompressionCodec proto.CompressionCodec // Compression codec to use
RetentionConfig WALRetentionConfig // WAL retention configuration
RespectTxBoundaries bool // Whether to respect transaction boundaries in batching
HeartbeatConfig *HeartbeatConfig // Configuration for heartbeat/keepalive
}
// DefaultPrimaryConfig returns a default configuration for primary nodes
func DefaultPrimaryConfig() *PrimaryConfig {
return &PrimaryConfig{
MaxBatchSizeKB: 256, // 256KB default batch size
EnableCompression: true,
CompressionCodec: proto.CompressionCodec_ZSTD,
RetentionConfig: WALRetentionConfig{
MaxAgeHours: 24, // Keep WAL files for 24 hours by default
MinSequenceKeep: 0, // No sequence-based retention by default
},
RespectTxBoundaries: true,
HeartbeatConfig: DefaultHeartbeatConfig(),
}
}
// ReplicaSession represents a connected replica
type ReplicaSession struct {
ID string // Unique session ID
StartSequence uint64 // Requested start sequence
Stream proto.WALReplicationService_StreamWALServer // gRPC stream
LastAckSequence uint64 // Last acknowledged sequence
SupportedCodecs []proto.CompressionCodec // Supported compression codecs
Connected bool // Whether the session is connected
Active bool // Whether the session is actively receiving WAL entries
LastActivity time.Time // Time of last activity
ListenerAddress string // Network address (host:port) the replica is listening on
mu sync.Mutex // Protects session state
}
// NewPrimary creates a new primary node for replication
func NewPrimary(w *wal.WAL, config *PrimaryConfig) (*Primary, error) {
if w == nil {
return nil, errors.New("WAL cannot be nil")
}
if config == nil {
config = DefaultPrimaryConfig()
}
// Create compressor
compressor, err := NewCompressionManager()
if err != nil {
return nil, fmt.Errorf("failed to create compressor: %w", err)
}
// Create batcher
batcher := NewWALBatcher(
config.MaxBatchSizeKB,
config.CompressionCodec,
config.RespectTxBoundaries,
)
primary := &Primary{
wal: w,
batcher: batcher,
compressor: compressor,
sessions: make(map[string]*ReplicaSession),
lastSyncedSeq: 0,
retentionConfig: config.RetentionConfig,
enableCompression: config.EnableCompression,
defaultCodec: config.CompressionCodec,
}
// Create heartbeat manager
primary.heartbeat = newHeartbeatManager(primary, config.HeartbeatConfig)
// Register as a WAL observer
w.RegisterObserver("primary_replication", primary)
// Start heartbeat monitoring
primary.heartbeat.start()
return primary, nil
}
// OnWALEntryWritten implements WALEntryObserver.OnWALEntryWritten
func (p *Primary) OnWALEntryWritten(entry *wal.Entry) {
log.Info("WAL entry written: seq=%d, type=%d, key=%s",
entry.SequenceNumber, entry.Type, string(entry.Key))
// Add to batch and broadcast if batch is full
batchReady, err := p.batcher.AddEntry(entry)
if err != nil {
// Log error but continue - don't block WAL operations
log.Error("Error adding WAL entry to batch: %v", err)
return
}
if batchReady {
log.Info("Batch ready for broadcast with %d entries", p.batcher.GetBatchCount())
response := p.batcher.GetBatch()
p.broadcastToReplicas(response)
} else {
log.Info("Entry added to batch (not ready for broadcast yet), current count: %d",
p.batcher.GetBatchCount())
// Even if the batch is not technically "ready", force sending if we have entries
// This is particularly important in low-traffic scenarios
if p.batcher.GetBatchCount() > 0 {
log.Info("Forcibly sending partial batch with %d entries", p.batcher.GetBatchCount())
response := p.batcher.GetBatch()
p.broadcastToReplicas(response)
}
}
}
// OnWALBatchWritten implements WALEntryObserver.OnWALBatchWritten
func (p *Primary) OnWALBatchWritten(startSeq uint64, entries []*wal.Entry) {
// Reset batcher to ensure a clean state when processing a batch
p.batcher.Reset()
// Process each entry in the batch
for _, entry := range entries {
ready, err := p.batcher.AddEntry(entry)
if err != nil {
log.Error("Error adding batch entry to replication: %v", err)
continue
}
// If we filled up the batch during processing, send it
if ready {
response := p.batcher.GetBatch()
p.broadcastToReplicas(response)
}
}
// If we have entries in the batch after processing all entries, send them
if p.batcher.GetBatchCount() > 0 {
response := p.batcher.GetBatch()
p.broadcastToReplicas(response)
}
}
// OnWALSync implements WALEntryObserver.OnWALSync
func (p *Primary) OnWALSync(upToSeq uint64) {
p.mu.Lock()
p.lastSyncedSeq = upToSeq
p.mu.Unlock()
// If we have any buffered entries, send them now that they're synced
if p.batcher.GetBatchCount() > 0 {
response := p.batcher.GetBatch()
p.broadcastToReplicas(response)
}
}
// StreamWAL implements WALReplicationServiceServer.StreamWAL
func (p *Primary) StreamWAL(
req *proto.WALStreamRequest,
stream proto.WALReplicationService_StreamWALServer,
) error {
// Validate request
if req.StartSequence < 0 {
return status.Error(codes.InvalidArgument, "start_sequence must be non-negative")
}
// Create a new session for this replica
sessionID := fmt.Sprintf("replica-%d", time.Now().UnixNano())
// Get the listener address from the request
listenerAddress := req.ListenerAddress
if listenerAddress == "" {
return status.Error(codes.InvalidArgument, "listener_address is required")
}
log.Info("Replica registered with address: %s", listenerAddress)
session := &ReplicaSession{
ID: sessionID,
StartSequence: req.StartSequence,
Stream: stream,
LastAckSequence: req.StartSequence,
SupportedCodecs: []proto.CompressionCodec{proto.CompressionCodec_NONE},
Connected: true,
Active: true,
LastActivity: time.Now(),
ListenerAddress: listenerAddress,
}
// Determine compression support
if req.CompressionSupported {
if req.PreferredCodec != proto.CompressionCodec_NONE {
// Use replica's preferred codec if supported
session.SupportedCodecs = []proto.CompressionCodec{
req.PreferredCodec,
proto.CompressionCodec_NONE, // Always support no compression as fallback
}
} else {
// Replica supports compression but has no preference, use defaults
session.SupportedCodecs = []proto.CompressionCodec{
p.defaultCodec,
proto.CompressionCodec_NONE,
}
}
}
// Register the session
p.registerReplicaSession(session)
defer p.unregisterReplicaSession(session.ID)
// Send the session ID in the response header metadata
// This is critical for the replica to identify itself in future requests
md := metadata.Pairs("session-id", session.ID)
if err := stream.SendHeader(md); err != nil {
log.Error("Failed to send session ID in header: %v", err)
return status.Errorf(codes.Internal, "Failed to send session ID: %v", err)
}
log.Info("Successfully sent session ID %s in stream header", session.ID)
// Send initial entries if starting from a specific sequence
if req.StartSequence > 0 {
if err := p.sendInitialEntries(session); err != nil {
return fmt.Errorf("failed to send initial entries: %w", err)
}
}
// Keep the stream alive and continue sending entries as they arrive
ctx := stream.Context()
// Periodically check if we have more entries to send
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
// Context was canceled, exit
return ctx.Err()
case <-ticker.C:
// Check if we have new entries to send
currentSeq := p.wal.GetNextSequence() - 1
if currentSeq > session.LastAckSequence {
log.Info("Checking for new entries: currentSeq=%d > lastAck=%d",
currentSeq, session.LastAckSequence)
if err := p.sendUpdatedEntries(session); err != nil {
log.Error("Failed to send updated entries: %v", err)
// Don't terminate the stream on error, just continue
}
}
}
}
}
// sendUpdatedEntries sends any new WAL entries to the replica since its last acknowledged sequence
func (p *Primary) sendUpdatedEntries(session *ReplicaSession) error {
// Take the mutex to safely read and update session state
session.mu.Lock()
defer session.mu.Unlock()
// Get the next sequence number we should send
nextSequence := session.LastAckSequence + 1
log.Info("Sending updated entries to replica %s starting from sequence %d",
session.ID, nextSequence)
// Get the next entries from WAL
entries, err := p.getWALEntriesFromSequence(nextSequence)
if err != nil {
return fmt.Errorf("failed to get WAL entries: %w", err)
}
if len(entries) == 0 {
// No new entries, nothing to send
log.Info("No new entries to send to replica %s", session.ID)
return nil
}
// Log what we're sending
log.Info("Sending %d entries to replica %s, sequence range: %d to %d",
len(entries), session.ID, entries[0].SequenceNumber, entries[len(entries)-1].SequenceNumber)
// Convert WAL entries to protocol buffer entries
protoEntries := make([]*proto.WALEntry, 0, len(entries))
for _, entry := range entries {
protoEntry, err := WALEntryToProto(entry, proto.FragmentType_FULL)
if err != nil {
log.Error("Error converting entry %d to proto: %v", entry.SequenceNumber, err)
continue
}
protoEntries = append(protoEntries, protoEntry)
}
// Create a response with the entries
response := &proto.WALStreamResponse{
Entries: protoEntries,
Compressed: false, // For simplicity, not compressing these entries
Codec: proto.CompressionCodec_NONE,
}
// Send to the replica (we're already holding the lock)
if err := session.Stream.Send(response); err != nil {
return fmt.Errorf("failed to send entries: %w", err)
}
log.Info("Successfully sent %d entries to replica %s", len(protoEntries), session.ID)
session.LastActivity = time.Now()
return nil
}
// Acknowledge implements WALReplicationServiceServer.Acknowledge
func (p *Primary) Acknowledge(
ctx context.Context,
req *proto.Ack,
) (*proto.AckResponse, error) {
// Log the acknowledgment request
log.Info("Received acknowledgment request: AcknowledgedUpTo=%d", req.AcknowledgedUpTo)
// Extract metadata for debugging
md, ok := metadata.FromIncomingContext(ctx)
if ok {
sessionIDs := md.Get("session-id")
if len(sessionIDs) > 0 {
log.Info("Acknowledge request contains session ID in metadata: %s", sessionIDs[0])
} else {
log.Warn("Acknowledge request missing session ID in metadata")
}
} else {
log.Warn("No metadata in acknowledge request")
}
// Update session with acknowledgment
sessionID := p.getSessionIDFromContext(ctx)
if sessionID == "" {
log.Error("Failed to identify session for acknowledgment")
return &proto.AckResponse{
Success: false,
Message: "Unknown session",
}, nil
}
log.Info("Using session ID for acknowledgment: %s", sessionID)
// Update the session's acknowledged sequence
if err := p.updateSessionAck(sessionID, req.AcknowledgedUpTo); err != nil {
log.Error("Failed to update acknowledgment: %v", err)
return &proto.AckResponse{
Success: false,
Message: err.Error(),
}, nil
}
log.Info("Successfully processed acknowledgment for session %s up to sequence %d",
sessionID, req.AcknowledgedUpTo)
// Check if we can prune WAL files
p.maybeManageWALRetention()
return &proto.AckResponse{
Success: true,
}, nil
}
// NegativeAcknowledge implements WALReplicationServiceServer.NegativeAcknowledge
func (p *Primary) NegativeAcknowledge(
ctx context.Context,
req *proto.Nack,
) (*proto.NackResponse, error) {
// Get the session ID from context
sessionID := p.getSessionIDFromContext(ctx)
if sessionID == "" {
return &proto.NackResponse{
Success: false,
Message: "Unknown session",
}, nil
}
// Get the session
session := p.getSession(sessionID)
if session == nil {
return &proto.NackResponse{
Success: false,
Message: "Session not found",
}, nil
}
// Resend WAL entries from the requested sequence
if err := p.resendEntries(session, req.MissingFromSequence); err != nil {
return &proto.NackResponse{
Success: false,
Message: fmt.Sprintf("Failed to resend entries: %v", err),
}, nil
}
return &proto.NackResponse{
Success: true,
}, nil
}
// broadcastToReplicas sends a WAL stream response to all connected replicas
func (p *Primary) broadcastToReplicas(response *proto.WALStreamResponse) {
p.mu.RLock()
defer p.mu.RUnlock()
for _, session := range p.sessions {
if !session.Connected || !session.Active {
continue
}
// Check if this session has requested entries from a higher sequence
if len(response.Entries) > 0 &&
response.Entries[0].SequenceNumber <= session.StartSequence {
continue
}
// Send to the replica - it will create a clone inside sendToReplica
p.sendToReplica(session, response)
}
}
// sendToReplica sends a WAL stream response to a specific replica
func (p *Primary) sendToReplica(session *ReplicaSession, response *proto.WALStreamResponse) {
if session == nil || !session.Connected || !session.Active {
return
}
// Clone the response to avoid concurrent modification
clonedResponse := &proto.WALStreamResponse{
Entries: response.Entries,
Compressed: response.Compressed,
Codec: response.Codec,
}
// Adjust compression based on replica's capabilities
if clonedResponse.Compressed {
codecSupported := false
for _, codec := range session.SupportedCodecs {
if codec == clonedResponse.Codec {
codecSupported = true
break
}
}
if !codecSupported {
// Decompress and use a codec the replica supports
decompressedEntries := make([]*proto.WALEntry, 0, len(clonedResponse.Entries))
for _, entry := range clonedResponse.Entries {
// Copy the entry to avoid modifying the original
decompressedEntry := &proto.WALEntry{
SequenceNumber: entry.SequenceNumber,
FragmentType: entry.FragmentType,
Checksum: entry.Checksum,
}
// Decompress if needed
if clonedResponse.Compressed {
decompressed, err := p.compressor.Decompress(entry.Payload, clonedResponse.Codec)
if err != nil {
log.Error("Error decompressing entry: %v", err)
continue
}
decompressedEntry.Payload = decompressed
} else {
decompressedEntry.Payload = entry.Payload
}
decompressedEntries = append(decompressedEntries, decompressedEntry)
}
// Update the response with uncompressed entries
clonedResponse.Entries = decompressedEntries
clonedResponse.Compressed = false
clonedResponse.Codec = proto.CompressionCodec_NONE
}
}
// Acquire lock to send to the stream
session.mu.Lock()
defer session.mu.Unlock()
// Send response through the gRPC stream
if err := session.Stream.Send(clonedResponse); err != nil {
log.Error("Error sending to replica %s: %v", session.ID, err)
session.Connected = false
} else {
session.LastActivity = time.Now()
}
}
// sendInitialEntries sends WAL entries from the requested start sequence to a replica
func (p *Primary) sendInitialEntries(session *ReplicaSession) error {
// Get entries from WAL
// Note: This is a simplified approach. A production implementation would:
// 1. Have more efficient retrieval of WAL entries by sequence
// 2. Handle large ranges of entries by sending in batches
// 3. Implement proper error handling for missing WAL files
// For now, we'll use a placeholder implementation
entries, err := p.getWALEntriesFromSequence(session.StartSequence)
if err != nil {
return fmt.Errorf("failed to get WAL entries: %w", err)
}
if len(entries) == 0 {
// No entries to send, that's okay
return nil
}
// Convert WAL entries to protocol buffer entries
protoEntries := make([]*proto.WALEntry, 0, len(entries))
for _, entry := range entries {
protoEntry, err := WALEntryToProto(entry, proto.FragmentType_FULL)
if err != nil {
log.Error("Error converting entry %d to proto: %v", entry.SequenceNumber, err)
continue
}
protoEntries = append(protoEntries, protoEntry)
}
// Create a response with the entries
response := &proto.WALStreamResponse{
Entries: protoEntries,
Compressed: false, // Initial entries are sent uncompressed for simplicity
Codec: proto.CompressionCodec_NONE,
}
// Send to the replica
session.mu.Lock()
defer session.mu.Unlock()
if err := session.Stream.Send(response); err != nil {
return fmt.Errorf("failed to send initial entries: %w", err)
}
session.LastActivity = time.Now()
return nil
}
// resendEntries resends WAL entries from the requested sequence to a replica
func (p *Primary) resendEntries(session *ReplicaSession, fromSequence uint64) error {
// Similar to sendInitialEntries but for handling NACKs
entries, err := p.getWALEntriesFromSequence(fromSequence)
if err != nil {
return fmt.Errorf("failed to get WAL entries: %w", err)
}
if len(entries) == 0 {
return fmt.Errorf("no entries found from sequence %d", fromSequence)
}
// Convert WAL entries to protocol buffer entries
protoEntries := make([]*proto.WALEntry, 0, len(entries))
for _, entry := range entries {
protoEntry, err := WALEntryToProto(entry, proto.FragmentType_FULL)
if err != nil {
log.Error("Error converting entry %d to proto: %v", entry.SequenceNumber, err)
continue
}
protoEntries = append(protoEntries, protoEntry)
}
// Create a response with the entries
response := &proto.WALStreamResponse{
Entries: protoEntries,
Compressed: false, // Resent entries are uncompressed for simplicity
Codec: proto.CompressionCodec_NONE,
}
// Send to the replica
session.mu.Lock()
defer session.mu.Unlock()
if err := session.Stream.Send(response); err != nil {
return fmt.Errorf("failed to resend entries: %w", err)
}
session.LastActivity = time.Now()
return nil
}
// getWALEntriesFromSequence retrieves WAL entries starting from the specified sequence
// in batches of up to maxEntriesToReturn entries at a time
func (p *Primary) getWALEntriesFromSequence(fromSequence uint64) ([]*wal.Entry, error) {
p.mu.RLock()
defer p.mu.RUnlock()
// Get current sequence in WAL (next sequence - 1)
// We subtract 1 to get the current highest assigned sequence
currentSeq := p.wal.GetNextSequence() - 1
log.Info("GetWALEntriesFromSequence called with fromSequence=%d, currentSeq=%d",
fromSequence, currentSeq)
if currentSeq == 0 || fromSequence > currentSeq {
// No entries to return yet
log.Info("No entries to return: currentSeq=%d, fromSequence=%d", currentSeq, fromSequence)
return []*wal.Entry{}, nil
}
// Use the WAL's built-in method to get entries starting from the specified sequence
// This preserves the original keys and values exactly as they were written
allEntries, err := p.wal.GetEntriesFrom(fromSequence)
if err != nil {
log.Error("Failed to get WAL entries: %v", err)
return nil, fmt.Errorf("failed to get WAL entries: %w", err)
}
log.Info("Retrieved %d entries from WAL starting at sequence %d", len(allEntries), fromSequence)
// Debugging: Log entry details
for i, entry := range allEntries {
if i < 5 { // Only log first few entries to avoid excessive logging
log.Info("Entry %d: seq=%d, type=%d, key=%s",
i, entry.SequenceNumber, entry.Type, string(entry.Key))
}
}
// Limit the number of entries to return to avoid overwhelming the network
maxEntriesToReturn := 100
if len(allEntries) > maxEntriesToReturn {
allEntries = allEntries[:maxEntriesToReturn]
log.Info("Limited entries to %d for network efficiency", maxEntriesToReturn)
}
log.Info("Returning %d entries starting from sequence %d", len(allEntries), fromSequence)
return allEntries, nil
}
// registerReplicaSession adds a new replica session
func (p *Primary) registerReplicaSession(session *ReplicaSession) {
p.mu.Lock()
defer p.mu.Unlock()
p.sessions[session.ID] = session
log.Info("Registered new replica session: %s starting from sequence %d",
session.ID, session.StartSequence)
}
// unregisterReplicaSession removes a replica session
func (p *Primary) unregisterReplicaSession(id string) {
p.mu.Lock()
defer p.mu.Unlock()
if _, exists := p.sessions[id]; exists {
delete(p.sessions, id)
log.Info("Unregistered replica session: %s", id)
}
}
// getSessionIDFromContext extracts the session ID from the gRPC context
// Note: In a real implementation, this would use proper authentication and session tracking
func (p *Primary) getSessionIDFromContext(ctx context.Context) string {
// Check for session ID in metadata (would be set by a proper authentication system)
md, ok := metadata.FromIncomingContext(ctx)
if ok {
// Look for session ID in metadata
sessionIDs := md.Get("session-id")
if len(sessionIDs) > 0 {
sessionID := sessionIDs[0]
log.Info("Found session ID in metadata: %s", sessionID)
// Verify the session exists
p.mu.RLock()
defer p.mu.RUnlock()
if _, exists := p.sessions[sessionID]; exists {
return sessionID
}
log.Error("Session ID from metadata not found in sessions map: %s", sessionID)
return ""
}
}
// Fallback to first active session approach
p.mu.RLock()
defer p.mu.RUnlock()
// Log the available sessions for debugging
log.Info("Looking for active session in %d available sessions", len(p.sessions))
for id, session := range p.sessions {
log.Info("Session %s: connected=%v, active=%v, lastAck=%d",
id, session.Connected, session.Active, session.LastAckSequence)
}
// Return the first active session ID (this is just a placeholder)
for id, session := range p.sessions {
if session.Connected {
log.Info("Selected active session %s", id)
return id
}
}
log.Error("No active session found")
return ""
}
// updateSessionAck updates a session's acknowledged sequence
func (p *Primary) updateSessionAck(sessionID string, ackSeq uint64) error {
p.mu.Lock()
defer p.mu.Unlock()
session, exists := p.sessions[sessionID]
if !exists {
return fmt.Errorf("session %s not found", sessionID)
}
// We need to lock the session to safely update LastAckSequence
session.mu.Lock()
defer session.mu.Unlock()
// Log the updated acknowledgement
log.Info("Updating replica %s acknowledgement: previous=%d, new=%d",
sessionID, session.LastAckSequence, ackSeq)
// Only update if the new ack sequence is higher than the current one
if ackSeq > session.LastAckSequence {
session.LastAckSequence = ackSeq
log.Info("Replica %s acknowledged data up to sequence %d", sessionID, ackSeq)
} else {
log.Warn("Received outdated acknowledgement from replica %s: got=%d, current=%d",
sessionID, ackSeq, session.LastAckSequence)
}
session.LastActivity = time.Now()
return nil
}
// getSession retrieves a session by ID
func (p *Primary) getSession(id string) *ReplicaSession {
p.mu.RLock()
defer p.mu.RUnlock()
return p.sessions[id]
}
// maybeManageWALRetention checks if WAL retention management should be triggered
func (p *Primary) maybeManageWALRetention() {
p.mu.RLock()
// Find minimum acknowledged sequence across all connected replicas
minAcknowledgedSeq := uint64(^uint64(0)) // Start with max value
activeReplicas := 0
for id, session := range p.sessions {
if session.Connected && session.Active {
activeReplicas++
if session.LastAckSequence < minAcknowledgedSeq {
minAcknowledgedSeq = session.LastAckSequence
}
log.Info("Replica %s has acknowledged up to sequence %d",
id, session.LastAckSequence)
}
}
p.mu.RUnlock()
// Only proceed if we have valid data and active replicas
if minAcknowledgedSeq == uint64(^uint64(0)) || minAcknowledgedSeq == 0 {
log.Info("No minimum acknowledged sequence found, skipping WAL retention")
return
}
log.Info("WAL retention: minimum acknowledged sequence across %d active replicas: %d",
activeReplicas, minAcknowledgedSeq)
// Apply the retention policy using the existing WAL API
config := wal.WALRetentionConfig{
MaxAge: time.Duration(p.retentionConfig.MaxAgeHours) * time.Hour,
MinSequenceKeep: minAcknowledgedSeq,
}
filesDeleted, err := p.wal.ManageRetention(config)
if err != nil {
log.Error("Failed to manage WAL retention: %v", err)
return
}
if filesDeleted > 0 {
log.Info("WAL retention: deleted %d files, min sequence kept: %d",
filesDeleted, minAcknowledgedSeq)
} else {
log.Info("WAL retention: no files eligible for deletion")
}
}
// Close shuts down the primary, unregistering from WAL and cleaning up resources
func (p *Primary) Close() error {
// Stop heartbeat monitoring
if p.heartbeat != nil {
p.heartbeat.stop()
}
// Unregister from WAL
p.wal.UnregisterObserver("primary_replication")
// Close all replica sessions
p.mu.Lock()
for id := range p.sessions {
session := p.sessions[id]
session.Connected = false
session.Active = false
}
p.sessions = make(map[string]*ReplicaSession)
p.mu.Unlock()
// Close the compressor
if p.compressor != nil {
p.compressor.Close()
}
return nil
}