From 77a05a89603ef08ce74e6a0f75949d1cefa33e54 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 19 May 2026 01:55:46 -0400 Subject: [PATCH] fix(notification-outbox): give KPI response a failure shape; log status-query faults --- .../Notification/NotificationOutboxQueries.cs | 4 ++ .../NotificationOutboxActor.cs | 46 +++++++++++++++---- .../Messages/NotificationMessagesTests.cs | 4 +- .../NotificationOutboxActorQueryTests.cs | 25 ++++++++++ 4 files changed, 68 insertions(+), 11 deletions(-) diff --git a/src/ScadaLink.Commons/Messages/Notification/NotificationOutboxQueries.cs b/src/ScadaLink.Commons/Messages/Notification/NotificationOutboxQueries.cs index 9bb7c4d..6fbb1b2 100644 --- a/src/ScadaLink.Commons/Messages/Notification/NotificationOutboxQueries.cs +++ b/src/ScadaLink.Commons/Messages/Notification/NotificationOutboxQueries.cs @@ -82,9 +82,13 @@ public record NotificationKpiRequest( /// /// Central -> Outbox UI: KPI summary for the notification outbox dashboard. +/// On a repository fault is false, +/// carries the cause, and the KPI fields are zeroed/null. /// public record NotificationKpiResponse( string CorrelationId, + bool Success, + string? ErrorMessage, int QueueDepth, int StuckCount, int ParkedCount, diff --git a/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs b/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs index f4abede..98cb9f1 100644 --- a/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs +++ b/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs @@ -298,9 +298,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers { var sender = Sender; var now = DateTimeOffset.UtcNow; - var stuckThreshold = _options.StuckAgeThreshold; - QueryOutboxAsync(request, now, stuckThreshold).PipeTo( + QueryOutboxAsync(request, now).PipeTo( sender, success: response => response, failure: ex => new NotificationOutboxQueryResponse( @@ -312,7 +311,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers } private async Task QueryOutboxAsync( - NotificationOutboxQueryRequest request, DateTimeOffset now, TimeSpan stuckThreshold) + NotificationOutboxQueryRequest request, DateTimeOffset now) { var filter = new NotificationOutboxFilter( Status: ParseEnum(request.StatusFilter), @@ -321,7 +320,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers ListName: request.ListNameFilter, SubjectKeyword: request.SubjectKeyword, StuckOnly: request.StuckOnly, - StuckCutoff: request.StuckOnly ? now - stuckThreshold : null, + StuckCutoff: request.StuckOnly ? StuckCutoff(now) : null, From: request.From, To: request.To); @@ -329,7 +328,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers var repository = scope.ServiceProvider.GetRequiredService(); var (rows, totalCount) = await repository.QueryAsync(filter, request.PageNumber, request.PageSize); - var stuckCutoff = now - stuckThreshold; + var stuckCutoff = StuckCutoff(now); var summaries = rows .Select(row => new NotificationSummary( row.NotificationId, @@ -362,9 +361,17 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers StatusQueryAsync(query).PipeTo( sender, success: response => response, - failure: _ => new NotificationStatusResponse( - query.CorrelationId, Found: false, Status: string.Empty, - RetryCount: 0, LastError: null, DeliveredAt: null)); + failure: ex => + { + // NotificationStatusResponse has no error field, so a repository fault is + // reported as Found: false — log the fault so a transient DB error is not + // silently indistinguishable from a genuinely-missing notification. + _logger.LogWarning( + ex, "Status query for notification {NotificationId} failed.", query.NotificationId); + return new NotificationStatusResponse( + query.CorrelationId, Found: false, Status: string.Empty, + RetryCount: 0, LastError: null, DeliveredAt: null); + }); } private async Task StatusQueryAsync(NotificationStatusQuery query) @@ -482,10 +489,21 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers { var sender = Sender; var now = DateTimeOffset.UtcNow; - var stuckCutoff = now - _options.StuckAgeThreshold; + var stuckCutoff = StuckCutoff(now); var deliveredSince = now - _options.DeliveredKpiWindow; - ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(sender); + ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo( + sender, + success: response => response, + failure: ex => new NotificationKpiResponse( + request.CorrelationId, + Success: false, + ErrorMessage: ex.GetBaseException().Message, + QueueDepth: 0, + StuckCount: 0, + ParkedCount: 0, + DeliveredLastInterval: 0, + OldestPendingAge: null)); } private async Task ComputeKpisAsync( @@ -497,6 +515,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers return new NotificationKpiResponse( correlationId, + Success: true, + ErrorMessage: null, snapshot.QueueDepth, snapshot.StuckCount, snapshot.ParkedCount, @@ -504,6 +524,12 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers snapshot.OldestPendingAge); } + /// + /// The instant before which a still-pending notification counts as stuck — + /// offset back by . + /// + private DateTimeOffset StuckCutoff(DateTimeOffset now) => now - _options.StuckAgeThreshold; + /// /// A notification counts as stuck when it is still in a non-terminal status /// (Pending or Retrying) and was created before the supplied cutoff. diff --git a/tests/ScadaLink.Commons.Tests/Messages/NotificationMessagesTests.cs b/tests/ScadaLink.Commons.Tests/Messages/NotificationMessagesTests.cs index 0fba09e..2b171c8 100644 --- a/tests/ScadaLink.Commons.Tests/Messages/NotificationMessagesTests.cs +++ b/tests/ScadaLink.Commons.Tests/Messages/NotificationMessagesTests.cs @@ -169,9 +169,11 @@ public class NotificationMessagesTests public void NotificationKpiResponse_WithExpression_ChangesSingleField() { var kpi = new NotificationKpiResponse( - "corr-1", 10, 2, 1, 5, TimeSpan.FromMinutes(3)); + "corr-1", Success: true, ErrorMessage: null, 10, 2, 1, 5, TimeSpan.FromMinutes(3)); var updated = kpi with { QueueDepth = 12 }; + Assert.True(kpi.Success); + Assert.Null(kpi.ErrorMessage); Assert.Equal(10, kpi.QueueDepth); Assert.Equal(12, updated.QueueDepth); Assert.Equal(2, updated.StuckCount); diff --git a/tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorQueryTests.cs b/tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorQueryTests.cs index 77616fe..f5aa410 100644 --- a/tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorQueryTests.cs +++ b/tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorQueryTests.cs @@ -323,6 +323,8 @@ public class NotificationOutboxActorQueryTests : TestKit var response = ExpectMsg(); Assert.Equal("corr-11", response.CorrelationId); + Assert.True(response.Success); + Assert.Null(response.ErrorMessage); Assert.Equal(7, response.QueueDepth); Assert.Equal(2, response.StuckCount); Assert.Equal(3, response.ParkedCount); @@ -332,4 +334,27 @@ public class NotificationOutboxActorQueryTests : TestKit _repository.Received(1).ComputeKpisAsync( Arg.Any(), Arg.Any(), Arg.Any()); } + + [Fact] + public void KpiRequest_RepositoryThrows_RepliesFailureResponse() + { + _repository.ComputeKpisAsync( + Arg.Any(), Arg.Any(), Arg.Any()) + .ThrowsAsync(new InvalidOperationException("kpi db down")); + var actor = CreateActor(); + + actor.Tell(new NotificationKpiRequest("corr-12"), TestActor); + + // A repository fault yields a failure NotificationKpiResponse, not a Status.Failure. + var response = ExpectMsg(); + Assert.Equal("corr-12", response.CorrelationId); + Assert.False(response.Success); + Assert.NotNull(response.ErrorMessage); + Assert.Contains("kpi db down", response.ErrorMessage); + Assert.Equal(0, response.QueueDepth); + Assert.Equal(0, response.StuckCount); + Assert.Equal(0, response.ParkedCount); + Assert.Equal(0, response.DeliveredLastInterval); + Assert.Null(response.OldestPendingAge); + } }