- JetStreamMetaGroup: validated proposals, inflight tracking, consumer counting, ApplyEntry dispatch - StreamReplicaGroup: ProposeMessageAsync, LeaderChanged event, message/sequence tracking, GetStatus - PlacementEngine tests: cluster affinity, tag filtering, storage ordering (16 tests) - Assignment serialization tests: quorum calc, has-quorum, property defaults (16 tests) - MetaGroup proposal tests: stream/consumer CRUD, leader validation, inflight (30 tests) - StreamRaftGroup tests: message proposals, step-down events, status (10 tests) - RAFT Go parity tests + JetStream cluster Go parity tests (partial B11 pre-work)
301 lines
10 KiB
C#
301 lines
10 KiB
C#
using NATS.Server.Raft;
|
|
|
|
namespace NATS.Server.JetStream.Cluster;
|
|
|
|
public sealed class StreamReplicaGroup
|
|
{
|
|
private readonly List<RaftNode> _nodes;
|
|
|
|
// B10: Message tracking for stream-specific RAFT apply logic.
|
|
// Go reference: jetstream_cluster.go processStreamMsg — message count and sequence tracking.
|
|
private long _messageCount;
|
|
private long _lastSequence;
|
|
|
|
public string StreamName { get; }
|
|
public IReadOnlyList<RaftNode> Nodes => _nodes;
|
|
public RaftNode Leader { get; private set; }
|
|
|
|
/// <summary>
|
|
/// Number of messages applied to the local store simulation.
|
|
/// Go reference: stream.go state.Msgs.
|
|
/// </summary>
|
|
public long MessageCount => Interlocked.Read(ref _messageCount);
|
|
|
|
/// <summary>
|
|
/// Last sequence number assigned to an applied message.
|
|
/// Go reference: stream.go state.LastSeq.
|
|
/// </summary>
|
|
public long LastSequence => Interlocked.Read(ref _lastSequence);
|
|
|
|
/// <summary>
|
|
/// Fired when leadership transfers to a new node.
|
|
/// Go reference: jetstream_cluster.go leader change notification.
|
|
/// </summary>
|
|
public event EventHandler<LeaderChangedEventArgs>? LeaderChanged;
|
|
|
|
/// <summary>
|
|
/// The stream assignment that was used to construct this group, if created from a
|
|
/// StreamAssignment. Null when constructed via the (string, int) overload.
|
|
/// Go reference: jetstream_cluster.go:166-184 streamAssignment struct.
|
|
/// </summary>
|
|
public StreamAssignment? Assignment { get; private set; }
|
|
|
|
// B10: Commit/processed index passthroughs to the leader node.
|
|
// Go reference: raft.go:150-160 (applied/processed fields).
|
|
|
|
/// <summary>The highest log index committed to quorum on the leader.</summary>
|
|
public long CommitIndex => Leader.CommitIndex;
|
|
|
|
/// <summary>The highest log index applied to the state machine on the leader.</summary>
|
|
public long ProcessedIndex => Leader.ProcessedIndex;
|
|
|
|
/// <summary>Number of committed entries awaiting state-machine application.</summary>
|
|
public int PendingCommits => Leader.CommitQueue.Count;
|
|
|
|
public StreamReplicaGroup(string streamName, int replicas)
|
|
{
|
|
StreamName = streamName;
|
|
|
|
var nodeCount = Math.Max(replicas, 1);
|
|
_nodes = Enumerable.Range(1, nodeCount)
|
|
.Select(i => new RaftNode($"{streamName.ToLowerInvariant()}-r{i}"))
|
|
.ToList();
|
|
|
|
foreach (var node in _nodes)
|
|
node.ConfigureCluster(_nodes);
|
|
|
|
Leader = ElectLeader(_nodes[0]);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates a StreamReplicaGroup from a StreamAssignment, naming each RaftNode after the
|
|
/// peers listed in the assignment's RaftGroup.
|
|
/// Go reference: jetstream_cluster.go processStreamAssignment — creates a per-stream
|
|
/// raft group from the assignment's group peers.
|
|
/// </summary>
|
|
public StreamReplicaGroup(StreamAssignment assignment)
|
|
{
|
|
Assignment = assignment;
|
|
StreamName = assignment.StreamName;
|
|
|
|
var peers = assignment.Group.Peers;
|
|
if (peers.Count == 0)
|
|
{
|
|
// Fall back to a single-node group when no peers are listed.
|
|
_nodes = [new RaftNode($"{StreamName.ToLowerInvariant()}-r1")];
|
|
}
|
|
else
|
|
{
|
|
_nodes = peers
|
|
.Select(peerId => new RaftNode(peerId))
|
|
.ToList();
|
|
}
|
|
|
|
foreach (var node in _nodes)
|
|
node.ConfigureCluster(_nodes);
|
|
|
|
Leader = ElectLeader(_nodes[0]);
|
|
}
|
|
|
|
public async ValueTask<long> ProposeAsync(string command, CancellationToken ct)
|
|
{
|
|
if (!Leader.IsLeader)
|
|
Leader = ElectLeader(SelectNextCandidate(Leader));
|
|
|
|
return await Leader.ProposeAsync(command, ct);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Proposes a message for storage to the stream's RAFT group.
|
|
/// Encodes subject + payload into a RAFT log entry command.
|
|
/// Go reference: jetstream_cluster.go processStreamMsg.
|
|
/// </summary>
|
|
public async ValueTask<long> ProposeMessageAsync(
|
|
string subject, ReadOnlyMemory<byte> headers, ReadOnlyMemory<byte> payload, CancellationToken ct)
|
|
{
|
|
if (!Leader.IsLeader)
|
|
throw new InvalidOperationException("Only the stream RAFT leader can propose messages.");
|
|
|
|
// Encode as a PUB command for the RAFT log
|
|
var command = $"MSG {subject} {headers.Length} {payload.Length}";
|
|
var index = await Leader.ProposeAsync(command, ct);
|
|
|
|
// Apply the message locally
|
|
ApplyMessage(index);
|
|
|
|
return index;
|
|
}
|
|
|
|
public Task StepDownAsync(CancellationToken ct)
|
|
{
|
|
_ = ct;
|
|
var previous = Leader;
|
|
previous.RequestStepDown();
|
|
Leader = ElectLeader(SelectNextCandidate(previous));
|
|
LeaderChanged?.Invoke(this, new LeaderChangedEventArgs(previous.Id, Leader.Id, Leader.Term));
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the current status of the stream replica group.
|
|
/// Go reference: jetstream_cluster.go stream replica status.
|
|
/// </summary>
|
|
public StreamReplicaStatus GetStatus()
|
|
{
|
|
return new StreamReplicaStatus
|
|
{
|
|
StreamName = StreamName,
|
|
LeaderId = Leader.Id,
|
|
LeaderTerm = Leader.Term,
|
|
MessageCount = MessageCount,
|
|
LastSequence = LastSequence,
|
|
ReplicaCount = _nodes.Count,
|
|
CommitIndex = Leader.CommitIndex,
|
|
AppliedIndex = Leader.AppliedIndex,
|
|
};
|
|
}
|
|
|
|
public Task ApplyPlacementAsync(IReadOnlyList<int> placement, CancellationToken ct)
|
|
{
|
|
_ = ct;
|
|
var targetCount = Math.Max(placement.Count, 1);
|
|
if (targetCount == _nodes.Count)
|
|
return Task.CompletedTask;
|
|
|
|
if (targetCount > _nodes.Count)
|
|
{
|
|
for (var i = _nodes.Count + 1; i <= targetCount; i++)
|
|
_nodes.Add(new RaftNode($"{streamNamePrefix()}-r{i}"));
|
|
}
|
|
else
|
|
{
|
|
_nodes.RemoveRange(targetCount, _nodes.Count - targetCount);
|
|
}
|
|
|
|
foreach (var node in _nodes)
|
|
node.ConfigureCluster(_nodes);
|
|
|
|
Leader = ElectLeader(_nodes[0]);
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
// B10: Per-stream RAFT apply logic
|
|
// Go reference: jetstream_cluster.go processStreamEntries / processStreamMsg
|
|
|
|
/// <summary>
|
|
/// Dequeues all currently pending committed entries from the leader's CommitQueue and
|
|
/// processes each one:
|
|
/// "+peer:<id>" — adds the peer via ProposeAddPeerAsync
|
|
/// "-peer:<id>" — removes the peer via ProposeRemovePeerAsync
|
|
/// anything else — marks the entry as processed via MarkProcessed
|
|
/// Go reference: jetstream_cluster.go:processStreamEntries (apply loop).
|
|
/// </summary>
|
|
public async Task ApplyCommittedEntriesAsync(CancellationToken ct)
|
|
{
|
|
while (Leader.CommitQueue.TryDequeue(out var entry))
|
|
{
|
|
if (entry is null)
|
|
continue;
|
|
|
|
if (entry.Command.StartsWith("+peer:", StringComparison.Ordinal))
|
|
{
|
|
var peerId = entry.Command["+peer:".Length..];
|
|
await Leader.ProposeAddPeerAsync(peerId, ct);
|
|
}
|
|
else if (entry.Command.StartsWith("-peer:", StringComparison.Ordinal))
|
|
{
|
|
var peerId = entry.Command["-peer:".Length..];
|
|
await Leader.ProposeRemovePeerAsync(peerId, ct);
|
|
}
|
|
else
|
|
{
|
|
Leader.MarkProcessed(entry.Index);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates a snapshot of the current state at the leader's applied index and compacts
|
|
/// the log up to that point.
|
|
/// Go reference: raft.go CreateSnapshotCheckpoint.
|
|
/// </summary>
|
|
public Task<RaftSnapshot> CheckpointAsync(CancellationToken ct)
|
|
=> Leader.CreateSnapshotCheckpointAsync(ct);
|
|
|
|
/// <summary>
|
|
/// Restores the leader from a previously created snapshot, draining any pending
|
|
/// commit-queue entries before applying the snapshot state.
|
|
/// Go reference: raft.go DrainAndReplaySnapshot.
|
|
/// </summary>
|
|
public Task RestoreFromSnapshotAsync(RaftSnapshot snapshot, CancellationToken ct)
|
|
=> Leader.DrainAndReplaySnapshotAsync(snapshot, ct);
|
|
|
|
private RaftNode SelectNextCandidate(RaftNode currentLeader)
|
|
{
|
|
if (_nodes.Count == 1)
|
|
return _nodes[0];
|
|
|
|
var index = _nodes.FindIndex(n => n.Id == currentLeader.Id);
|
|
if (index < 0)
|
|
return _nodes[0];
|
|
|
|
return _nodes[(index + 1) % _nodes.Count];
|
|
}
|
|
|
|
private RaftNode ElectLeader(RaftNode candidate)
|
|
{
|
|
candidate.StartElection(_nodes.Count);
|
|
foreach (var voter in _nodes.Where(n => n.Id != candidate.Id))
|
|
candidate.ReceiveVote(voter.GrantVote(candidate.Term), _nodes.Count);
|
|
|
|
return candidate;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Applies a committed message entry, incrementing message count and sequence.
|
|
/// Go reference: jetstream_cluster.go processStreamMsg apply.
|
|
/// </summary>
|
|
private void ApplyMessage(long index)
|
|
{
|
|
Interlocked.Increment(ref _messageCount);
|
|
// Sequence numbers track 1:1 with applied messages.
|
|
// Use the RAFT index as the sequence to ensure monotonic ordering.
|
|
long current;
|
|
long desired;
|
|
do
|
|
{
|
|
current = Interlocked.Read(ref _lastSequence);
|
|
desired = Math.Max(current, index);
|
|
}
|
|
while (Interlocked.CompareExchange(ref _lastSequence, desired, current) != current);
|
|
}
|
|
|
|
private string streamNamePrefix() => StreamName.ToLowerInvariant();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Status snapshot of a stream replica group.
|
|
/// Go reference: jetstream_cluster.go stream replica status report.
|
|
/// </summary>
|
|
public sealed class StreamReplicaStatus
|
|
{
|
|
public string StreamName { get; init; } = string.Empty;
|
|
public string LeaderId { get; init; } = string.Empty;
|
|
public int LeaderTerm { get; init; }
|
|
public long MessageCount { get; init; }
|
|
public long LastSequence { get; init; }
|
|
public int ReplicaCount { get; init; }
|
|
public long CommitIndex { get; init; }
|
|
public long AppliedIndex { get; init; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Event args for leader change notifications.
|
|
/// </summary>
|
|
public sealed class LeaderChangedEventArgs(string previousLeaderId, string newLeaderId, int newTerm) : EventArgs
|
|
{
|
|
public string PreviousLeaderId { get; } = previousLeaderId;
|
|
public string NewLeaderId { get; } = newLeaderId;
|
|
public int NewTerm { get; } = newTerm;
|
|
}
|