fix(communication): resolve Communication-004..008 — Resume supervision, gRPC option wiring, address-load logging, sync dispose, flap detection
This commit is contained in:
@@ -22,6 +22,9 @@ public class DebugStreamBridgeActorTests : TestKit
|
||||
{
|
||||
// Use a very short reconnect delay for testing
|
||||
DebugStreamBridgeActor.ReconnectDelay = TimeSpan.FromMilliseconds(100);
|
||||
// Long stability window so streams are never considered "stable" mid-test
|
||||
// unless a test deliberately waits it out.
|
||||
DebugStreamBridgeActor.StabilityWindow = TimeSpan.FromSeconds(30);
|
||||
}
|
||||
|
||||
private record TestContext(
|
||||
@@ -264,8 +267,13 @@ public class DebugStreamBridgeActorTests : TestKit
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Grpc_Error_Resets_RetryCount_On_Successful_Event()
|
||||
public void FlappingStream_DeliveringEventsBetweenFailures_StillTerminatesAfterMaxRetries()
|
||||
{
|
||||
// Communication-008 regression: a stream that connects, delivers an event,
|
||||
// then fails — repeatedly — must still trip MaxRetries. The retry count is
|
||||
// NO LONGER reset by a received event (only by the stability window). The
|
||||
// previous behaviour reset _retryCount on every event, so a flapping site
|
||||
// reconnected forever and the debug session lived on indefinitely.
|
||||
var ctx = CreateBridgeActor();
|
||||
ctx.CommProbe.ExpectMsg<SiteEnvelope>();
|
||||
|
||||
@@ -275,30 +283,72 @@ public class DebugStreamBridgeActorTests : TestKit
|
||||
new List<AlarmStateChanged>(),
|
||||
DateTimeOffset.UtcNow);
|
||||
|
||||
Watch(ctx.BridgeActor);
|
||||
ctx.BridgeActor.Tell(snapshot);
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == 1, TimeSpan.FromSeconds(3));
|
||||
|
||||
// First error → retry 1
|
||||
ctx.MockGrpcClient.SubscribeCalls[0].OnError(new Exception("Error 1"));
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == 2, TimeSpan.FromSeconds(5));
|
||||
|
||||
// Simulate successful event (resets retry count)
|
||||
var attrChange = new AttributeValueChanged(InstanceName, "IO", "Temp", 42.5, "Good", DateTimeOffset.UtcNow);
|
||||
ctx.MockGrpcClient.SubscribeCalls[1].OnEvent(attrChange);
|
||||
AwaitCondition(() => { lock (ctx.ReceivedEvents) { return ctx.ReceivedEvents.Count == 2; } },
|
||||
TimeSpan.FromSeconds(3));
|
||||
|
||||
// Now another 3 errors should be tolerated (retry count was reset)
|
||||
ctx.MockGrpcClient.SubscribeCalls[1].OnError(new Exception("Error 2"));
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == 3, TimeSpan.FromSeconds(5));
|
||||
// Flap: deliver one event then fail, three times. Each event would, under
|
||||
// the old buggy logic, reset the retry budget and prevent termination.
|
||||
for (var i = 0; i < 3; i++)
|
||||
{
|
||||
var call = ctx.MockGrpcClient.SubscribeCalls[i];
|
||||
call.OnEvent(attrChange);
|
||||
call.OnError(new Exception($"Flap {i + 1}"));
|
||||
var expected = i + 2;
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == expected, TimeSpan.FromSeconds(5));
|
||||
}
|
||||
|
||||
ctx.MockGrpcClient.SubscribeCalls[2].OnError(new Exception("Error 3"));
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == 4, TimeSpan.FromSeconds(5));
|
||||
// Fourth error (after the 3 retries) must exceed MaxRetries and terminate.
|
||||
ctx.MockGrpcClient.SubscribeCalls[3].OnEvent(attrChange);
|
||||
ctx.MockGrpcClient.SubscribeCalls[3].OnError(new Exception("Flap 4"));
|
||||
|
||||
ctx.MockGrpcClient.SubscribeCalls[3].OnError(new Exception("Error 4"));
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == 5, TimeSpan.FromSeconds(5));
|
||||
ExpectTerminated(ctx.BridgeActor, TimeSpan.FromSeconds(5));
|
||||
Assert.True(ctx.TerminatedFlag[0]);
|
||||
}
|
||||
|
||||
// Still alive — 3 retries from the second failure point succeeded
|
||||
[Fact]
|
||||
public void RetryCount_RecoveredOnlyAfterStreamStaysStableForStabilityWindow()
|
||||
{
|
||||
// Communication-008: after a stream has been connected for the stability
|
||||
// window, the retry budget is recovered — a later transient fault then gets
|
||||
// a fresh set of retries rather than being counted against the old budget.
|
||||
DebugStreamBridgeActor.StabilityWindow = TimeSpan.FromMilliseconds(300);
|
||||
try
|
||||
{
|
||||
var ctx = CreateBridgeActor();
|
||||
ctx.CommProbe.ExpectMsg<SiteEnvelope>();
|
||||
|
||||
var snapshot = new DebugViewSnapshot(
|
||||
InstanceName,
|
||||
new List<AttributeValueChanged>(),
|
||||
new List<AlarmStateChanged>(),
|
||||
DateTimeOffset.UtcNow);
|
||||
|
||||
Watch(ctx.BridgeActor);
|
||||
ctx.BridgeActor.Tell(snapshot);
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == 1, TimeSpan.FromSeconds(3));
|
||||
|
||||
// Two failures — but each new stream stays up long enough (the mock
|
||||
// stream only completes on cancel) for the stability window to elapse
|
||||
// and reset the retry budget before the next failure.
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
Thread.Sleep(450); // exceed the 300ms stability window
|
||||
ctx.MockGrpcClient.SubscribeCalls[i].OnError(new Exception($"Error {i + 1}"));
|
||||
var expected = i + 2;
|
||||
AwaitCondition(() => ctx.MockGrpcClient.SubscribeCalls.Count == expected, TimeSpan.FromSeconds(5));
|
||||
}
|
||||
|
||||
// Five well-spaced failures did NOT terminate the actor because each
|
||||
// reconnect recovered its retry budget after the stability window.
|
||||
Assert.False(ctx.TerminatedFlag[0]);
|
||||
}
|
||||
finally
|
||||
{
|
||||
DebugStreamBridgeActor.StabilityWindow = TimeSpan.FromSeconds(30);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user