6ae0fea558
Async cancellation hygiene, fire-and-forget observability, retry/shutdown semantics, and audit-row coverage across 9 modules. Highlights: Cancellation & lifecycle: - AuditLog-006: SqliteAuditWriter.Dispose hops to thread pool, escaping the captured SyncContext that risked sync-over-async deadlock. - AuditLog-010: SiteAuditTelemetryActor owns a private lifecycle CTS, threaded through drain paths instead of CancellationToken.None. - Comm-019: CentralCommunicationActor adds lifecycle CTS for repo calls. - Host-019: Migration StartupRetry forwards ApplicationStopping so SIGTERM during the bounded-retry window aborts cleanly. Cursor / retry / counter correctness: - AuditLog-004: SiteAuditReconciliationActor's cursor now holds at `since` when any row's idempotent insert is still being retried (per-EventId retry counter, MaxPermanentInsertAttempts=5 escape valve with LogCritical abandon). No more silent abandonment of permanently-failing rows. - ConfigDB-019: Dropped the catch-and-continue on EnsureLookaheadAsync's SPLIT loop — by class-doc construction the catch could only mask real failures and let the next iteration create permanent partition holes. - HM-017/018: HealthReportSender + CentralHealthReportLoop snapshot per-interval counters before sending, restore via new ISiteHealthCollector.AddIntervalCounters on transport failure so counts aren't silently lost. Fire-and-forget / shutdown waits: - InboundAPI-018: AuditWriteMiddleware observes faulted audit-write tasks via OnlyOnFaulted continuation (Warning log; response unchanged). - SnF-024: StoreAndForwardService.StopAsync awaits in-flight retry sweep with a bounded SweepShutdownWaitTimeout (10s). Leak / refactor: - Comm-021: SiteStreamGrpcServer.SubscribeInstance wraps Subscribe in its own try/catch so a throw doesn't leak the relay actor or _activeStreams entry. - Comm-022: VERIFIED already-closed by Comm-016's dead-code purge. - CLI-017: BundleCommands' three subcommands delegate to ExecuteCommandAsync (auth-failure exit-code contract unified). Defensive / validation: - CLI-021: CliConfig.Load wraps file-read/JSON parse so malformed config prints a warning and returns defaults instead of crashing the CLI. - Host-022: ParseLevel emits stderr one-shot warning for unrecognised MinimumLevel instead of silently coercing to Information. - ESG-019: ExternalSystemClient sets HttpClient.Timeout=Infinite so the per-call CTS is the sole timeout source (was clipped to 100s by .NET). - Security-020: New SecurityOptionsValidator (IValidateOptions) rejects empty LdapServer/LdapSearchBase with ValidateOnStart. - DM-019: Lifecycle command timeouts now emit DisableTimedOut/EnableTimedOut/ DeleteTimedOut audit entries (mirrors DeployFailed pattern). Plus reconciled stale per-module Open-findings counters that had drifted from prior sessions. 20+ new regression tests across 11 test projects; build clean; affected suites all green. README regenerated: 75 open (was 93).
373 lines
14 KiB
C#
373 lines
14 KiB
C#
using System.Threading.Channels;
|
|
using Akka.Actor;
|
|
using Akka.TestKit.Xunit2;
|
|
using Grpc.Core;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
using NSubstitute;
|
|
using ScadaLink.Communication.Grpc;
|
|
|
|
namespace ScadaLink.Communication.Tests.Grpc;
|
|
|
|
public class SiteStreamGrpcServerTests : TestKit
|
|
{
|
|
private readonly ISiteStreamSubscriber _subscriber;
|
|
private readonly ILogger<SiteStreamGrpcServer> _logger;
|
|
|
|
public SiteStreamGrpcServerTests()
|
|
{
|
|
_subscriber = Substitute.For<ISiteStreamSubscriber>();
|
|
_subscriber.Subscribe(Arg.Any<string>(), Arg.Any<IActorRef>())
|
|
.Returns("sub-1");
|
|
_logger = NullLogger<SiteStreamGrpcServer>.Instance;
|
|
}
|
|
|
|
private SiteStreamGrpcServer CreateServer(int maxStreams = 100)
|
|
{
|
|
return new SiteStreamGrpcServer(_subscriber, _logger, maxStreams);
|
|
}
|
|
|
|
private static InstanceStreamRequest MakeRequest(string correlationId = "corr-1", string instance = "Site1.Pump01")
|
|
{
|
|
return new InstanceStreamRequest
|
|
{
|
|
CorrelationId = correlationId,
|
|
InstanceUniqueName = instance
|
|
};
|
|
}
|
|
|
|
[Fact]
|
|
public async Task RejectsWhenNotReady()
|
|
{
|
|
var server = CreateServer();
|
|
// Do NOT call SetReady()
|
|
|
|
var writer = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
var context = CreateMockContext();
|
|
|
|
var ex = await Assert.ThrowsAsync<RpcException>(
|
|
() => server.SubscribeInstance(MakeRequest(), writer, context));
|
|
|
|
Assert.Equal(StatusCode.Unavailable, ex.StatusCode);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task RejectsWhenMaxStreamsReached()
|
|
{
|
|
var server = CreateServer(maxStreams: 1);
|
|
server.SetReady(Sys);
|
|
|
|
// Start one stream that blocks
|
|
var cts1 = new CancellationTokenSource();
|
|
var context1 = CreateMockContext(cts1.Token);
|
|
var writer1 = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
|
|
var stream1Task = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest("corr-1"), writer1, context1));
|
|
|
|
// Wait for the first stream to register
|
|
await WaitForConditionAsync(() => server.ActiveStreamCount == 1);
|
|
|
|
// Second stream should be rejected
|
|
var writer2 = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
var context2 = CreateMockContext();
|
|
|
|
var ex = await Assert.ThrowsAsync<RpcException>(
|
|
() => server.SubscribeInstance(MakeRequest("corr-2"), writer2, context2));
|
|
|
|
Assert.Equal(StatusCode.ResourceExhausted, ex.StatusCode);
|
|
|
|
// Clean up first stream
|
|
cts1.Cancel();
|
|
await stream1Task;
|
|
}
|
|
|
|
[Fact]
|
|
public async Task CancelsDuplicateCorrelationId()
|
|
{
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
var cts1 = new CancellationTokenSource();
|
|
var context1 = CreateMockContext(cts1.Token);
|
|
var writer1 = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
|
|
// Start first stream
|
|
var stream1Task = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest("corr-dup"), writer1, context1));
|
|
|
|
await WaitForConditionAsync(() => server.ActiveStreamCount == 1);
|
|
|
|
// Start second stream with same correlationId -- should cancel first
|
|
var cts2 = new CancellationTokenSource();
|
|
var context2 = CreateMockContext(cts2.Token);
|
|
var writer2 = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
|
|
var stream2Task = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest("corr-dup"), writer2, context2));
|
|
|
|
// First stream should complete (cancelled by duplicate replacement)
|
|
await stream1Task;
|
|
|
|
// Second stream should be active
|
|
await WaitForConditionAsync(() => server.ActiveStreamCount == 1);
|
|
|
|
// Clean up
|
|
cts2.Cancel();
|
|
await stream2Task;
|
|
}
|
|
|
|
[Fact]
|
|
public async Task CleansUpOnCancellation()
|
|
{
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
var cts = new CancellationTokenSource();
|
|
var context = CreateMockContext(cts.Token);
|
|
var writer = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
|
|
var streamTask = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest("corr-cleanup"), writer, context));
|
|
|
|
await WaitForConditionAsync(() => server.ActiveStreamCount == 1);
|
|
|
|
cts.Cancel();
|
|
await streamTask;
|
|
|
|
Assert.Equal(0, server.ActiveStreamCount);
|
|
}
|
|
|
|
// --- Host-017 / REQ-HOST-7: site-shutdown ordering ---
|
|
|
|
[Fact]
|
|
public async Task Host017_CancelAllStreams_CancelsActiveStreamsAndRefusesNewOnes()
|
|
{
|
|
// REQ-HOST-7 step (1)+(2): on CoordinatedShutdown the gRPC server must
|
|
// stop accepting new streams AND cancel every active stream so the
|
|
// client observes a clean Cancelled (not a silent stream that only
|
|
// times out via keepalive). Program.cs registers
|
|
// ApplicationStopping → CancelAllStreams(); this test exercises the
|
|
// server-side guarantee in isolation.
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
var cts1 = new CancellationTokenSource();
|
|
var context1 = CreateMockContext(cts1.Token);
|
|
var writer1 = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
|
|
var stream1Task = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest("corr-shutdown-1"), writer1, context1));
|
|
|
|
await WaitForConditionAsync(() => server.ActiveStreamCount == 1);
|
|
|
|
// Begin shutdown — flip the flag AND cancel the active stream.
|
|
server.CancelAllStreams();
|
|
|
|
Assert.True(server.IsShuttingDown);
|
|
|
|
// Active stream's await foreach observes OCE and falls through finally
|
|
// → entry is removed from _activeStreams.
|
|
await stream1Task;
|
|
Assert.Equal(0, server.ActiveStreamCount);
|
|
|
|
// A second SubscribeInstance after shutdown is refused immediately
|
|
// with Unavailable rather than allowed to register a new stream.
|
|
var writer2 = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
var context2 = CreateMockContext();
|
|
var ex = await Assert.ThrowsAsync<RpcException>(
|
|
() => server.SubscribeInstance(MakeRequest("corr-shutdown-2"), writer2, context2));
|
|
Assert.Equal(StatusCode.Unavailable, ex.StatusCode);
|
|
Assert.Contains("shutting", ex.Status.Detail, StringComparison.OrdinalIgnoreCase);
|
|
}
|
|
|
|
[Fact]
|
|
public void Host017_CancelAllStreams_IsIdempotent()
|
|
{
|
|
// Repeated calls during a double-fire shutdown sequence must not throw.
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
server.CancelAllStreams();
|
|
server.CancelAllStreams();
|
|
|
|
Assert.True(server.IsShuttingDown);
|
|
Assert.Equal(0, server.ActiveStreamCount);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task SubscribesAndRemovesFromStreamManager()
|
|
{
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
var cts = new CancellationTokenSource();
|
|
var context = CreateMockContext(cts.Token);
|
|
var writer = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
|
|
var streamTask = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest("corr-sub", "Site1.Motor01"), writer, context));
|
|
|
|
await WaitForConditionAsync(() => server.ActiveStreamCount == 1);
|
|
|
|
// Verify Subscribe was called
|
|
_subscriber.Received(1).Subscribe("Site1.Motor01", Arg.Any<IActorRef>());
|
|
|
|
cts.Cancel();
|
|
await streamTask;
|
|
|
|
// Verify RemoveSubscriber was called
|
|
_subscriber.Received(1).RemoveSubscriber(Arg.Any<IActorRef>());
|
|
}
|
|
|
|
[Fact]
|
|
public async Task WritesEventsToResponseStream()
|
|
{
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
// Capture the relay actor so we can send it events
|
|
IActorRef? capturedActor = null;
|
|
_subscriber.Subscribe(Arg.Any<string>(), Arg.Any<IActorRef>())
|
|
.Returns(ci =>
|
|
{
|
|
capturedActor = ci.Arg<IActorRef>();
|
|
return "sub-write";
|
|
});
|
|
|
|
var cts = new CancellationTokenSource();
|
|
var context = CreateMockContext(cts.Token);
|
|
var writer = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
var writtenEvents = new List<SiteStreamEvent>();
|
|
writer.WriteAsync(Arg.Any<SiteStreamEvent>(), Arg.Any<CancellationToken>())
|
|
.Returns(Task.CompletedTask)
|
|
.AndDoes(ci => writtenEvents.Add(ci.Arg<SiteStreamEvent>()));
|
|
|
|
var streamTask = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest("corr-write", "Site1.Pump01"), writer, context));
|
|
|
|
await WaitForConditionAsync(() => capturedActor != null);
|
|
|
|
// Send a domain event to the relay actor
|
|
var ts = DateTimeOffset.UtcNow;
|
|
capturedActor!.Tell(new Commons.Messages.Streaming.AttributeValueChanged(
|
|
"Site1.Pump01", "Path", "Attr", 99.5, "Good", ts));
|
|
|
|
// Wait for event to be written
|
|
await WaitForConditionAsync(() => writtenEvents.Count >= 1);
|
|
|
|
Assert.Single(writtenEvents);
|
|
Assert.Equal("corr-write", writtenEvents[0].CorrelationId);
|
|
Assert.Equal(SiteStreamEvent.EventOneofCase.AttributeChanged, writtenEvents[0].EventCase);
|
|
|
|
cts.Cancel();
|
|
await streamTask;
|
|
}
|
|
|
|
[Theory]
|
|
[InlineData("corr/with/slash")]
|
|
[InlineData("corr with space")]
|
|
[InlineData("")]
|
|
[InlineData("$weird")]
|
|
public async Task RejectsCorrelationIdThatIsNotActorNameSafe(string badCorrelationId)
|
|
{
|
|
// Communication-014 regression: a public gRPC SubscribeInstance must not feed
|
|
// an untrusted correlation_id straight into an Akka actor name. An unsafe id
|
|
// must be rejected cleanly with InvalidArgument rather than escaping as an
|
|
// unhandled InvalidActorNameException.
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
var writer = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
var context = CreateMockContext();
|
|
|
|
var ex = await Assert.ThrowsAsync<RpcException>(
|
|
() => server.SubscribeInstance(MakeRequest(badCorrelationId), writer, context));
|
|
|
|
Assert.Equal(StatusCode.InvalidArgument, ex.StatusCode);
|
|
Assert.Equal(0, server.ActiveStreamCount);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task AcceptsActorNameSafeCorrelationId()
|
|
{
|
|
// A normal GUID-style correlation id (what central always supplies) is accepted.
|
|
var server = CreateServer();
|
|
server.SetReady(Sys);
|
|
|
|
var cts = new CancellationTokenSource();
|
|
var context = CreateMockContext(cts.Token);
|
|
var writer = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
|
|
var streamTask = Task.Run(() => server.SubscribeInstance(
|
|
MakeRequest(Guid.NewGuid().ToString()), writer, context));
|
|
|
|
await WaitForConditionAsync(() => server.ActiveStreamCount == 1);
|
|
|
|
cts.Cancel();
|
|
await streamTask;
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Comm021_SubscribeThrows_StopsRelayActorAndRemovesActiveStreamEntry()
|
|
{
|
|
// Communication-021 regression: SubscribeInstance creates a StreamRelayActor
|
|
// and registers an _activeStreams entry BEFORE calling _streamSubscriber.Subscribe.
|
|
// If Subscribe throws (e.g. stale instance, site runtime shutting down) and the
|
|
// pre-fix code lets the throw escape without the wrapping try, the relay actor
|
|
// and the activeStreams entry both leak. The fix wraps the Subscribe call so the
|
|
// catch deterministically stops the actor and removes the entry before re-throw.
|
|
var subscriber = Substitute.For<ISiteStreamSubscriber>();
|
|
subscriber.Subscribe(Arg.Any<string>(), Arg.Any<IActorRef>())
|
|
.Returns<string>(_ => throw new InvalidOperationException("instance not found"));
|
|
|
|
var server = new SiteStreamGrpcServer(subscriber, _logger);
|
|
server.SetReady(Sys);
|
|
|
|
var writer = Substitute.For<IServerStreamWriter<SiteStreamEvent>>();
|
|
var context = CreateMockContext();
|
|
|
|
// The InvalidOperationException is expected to propagate (the gRPC stack maps
|
|
// unhandled throws to Internal); the load-bearing assertion is the cleanup.
|
|
await Assert.ThrowsAsync<InvalidOperationException>(
|
|
() => server.SubscribeInstance(MakeRequest("corr-comm021"), writer, context));
|
|
|
|
// _activeStreams entry was inserted before Subscribe was called; the catch
|
|
// must remove it so a follow-up subscription with the same correlation id is
|
|
// not blocked, and the relay actor must be stopped so it does not leak.
|
|
Assert.Equal(0, server.ActiveStreamCount);
|
|
|
|
// RemoveSubscriber must NOT have been called (Subscribe never returned a
|
|
// subscription id) — verifying we hit the catch path, not the finally path.
|
|
subscriber.DidNotReceive().RemoveSubscriber(Arg.Any<IActorRef>());
|
|
}
|
|
|
|
[Fact]
|
|
public void SetReady_AllowsStreamCreation()
|
|
{
|
|
var server = CreateServer();
|
|
// Initially not ready -- just verify the property works
|
|
server.SetReady(Sys);
|
|
// No assertion needed -- the other tests verify that SetReady enables streaming
|
|
Assert.Equal(0, server.ActiveStreamCount);
|
|
}
|
|
|
|
private static ServerCallContext CreateMockContext(CancellationToken cancellationToken = default)
|
|
{
|
|
var context = Substitute.For<ServerCallContext>();
|
|
context.CancellationToken.Returns(cancellationToken);
|
|
return context;
|
|
}
|
|
|
|
private static async Task WaitForConditionAsync(Func<bool> condition, int timeoutMs = 5000)
|
|
{
|
|
var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs);
|
|
while (!condition() && DateTime.UtcNow < deadline)
|
|
{
|
|
await Task.Delay(25);
|
|
}
|
|
|
|
Assert.True(condition(), $"Condition not met within {timeoutMs}ms");
|
|
}
|
|
}
|