fix(notification-outbox): give KPI response a failure shape; log status-query faults
This commit is contained in:
@@ -82,9 +82,13 @@ public record NotificationKpiRequest(
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Outbox UI: KPI summary for the notification outbox dashboard.
|
||||
/// On a repository fault <see cref="Success"/> is <c>false</c>, <see cref="ErrorMessage"/>
|
||||
/// carries the cause, and the KPI fields are zeroed/<c>null</c>.
|
||||
/// </summary>
|
||||
public record NotificationKpiResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
int QueueDepth,
|
||||
int StuckCount,
|
||||
int ParkedCount,
|
||||
|
||||
@@ -298,9 +298,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var stuckThreshold = _options.StuckAgeThreshold;
|
||||
|
||||
QueryOutboxAsync(request, now, stuckThreshold).PipeTo(
|
||||
QueryOutboxAsync(request, now).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new NotificationOutboxQueryResponse(
|
||||
@@ -312,7 +311,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
|
||||
private async Task<NotificationOutboxQueryResponse> QueryOutboxAsync(
|
||||
NotificationOutboxQueryRequest request, DateTimeOffset now, TimeSpan stuckThreshold)
|
||||
NotificationOutboxQueryRequest request, DateTimeOffset now)
|
||||
{
|
||||
var filter = new NotificationOutboxFilter(
|
||||
Status: ParseEnum<NotificationStatus>(request.StatusFilter),
|
||||
@@ -321,7 +320,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
ListName: request.ListNameFilter,
|
||||
SubjectKeyword: request.SubjectKeyword,
|
||||
StuckOnly: request.StuckOnly,
|
||||
StuckCutoff: request.StuckOnly ? now - stuckThreshold : null,
|
||||
StuckCutoff: request.StuckOnly ? StuckCutoff(now) : null,
|
||||
From: request.From,
|
||||
To: request.To);
|
||||
|
||||
@@ -329,7 +328,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var (rows, totalCount) = await repository.QueryAsync(filter, request.PageNumber, request.PageSize);
|
||||
|
||||
var stuckCutoff = now - stuckThreshold;
|
||||
var stuckCutoff = StuckCutoff(now);
|
||||
var summaries = rows
|
||||
.Select(row => new NotificationSummary(
|
||||
row.NotificationId,
|
||||
@@ -362,9 +361,17 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
StatusQueryAsync(query).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: _ => new NotificationStatusResponse(
|
||||
failure: ex =>
|
||||
{
|
||||
// NotificationStatusResponse has no error field, so a repository fault is
|
||||
// reported as Found: false — log the fault so a transient DB error is not
|
||||
// silently indistinguishable from a genuinely-missing notification.
|
||||
_logger.LogWarning(
|
||||
ex, "Status query for notification {NotificationId} failed.", query.NotificationId);
|
||||
return new NotificationStatusResponse(
|
||||
query.CorrelationId, Found: false, Status: string.Empty,
|
||||
RetryCount: 0, LastError: null, DeliveredAt: null));
|
||||
RetryCount: 0, LastError: null, DeliveredAt: null);
|
||||
});
|
||||
}
|
||||
|
||||
private async Task<NotificationStatusResponse> StatusQueryAsync(NotificationStatusQuery query)
|
||||
@@ -482,10 +489,21 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var stuckCutoff = now - _options.StuckAgeThreshold;
|
||||
var stuckCutoff = StuckCutoff(now);
|
||||
var deliveredSince = now - _options.DeliveredKpiWindow;
|
||||
|
||||
ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(sender);
|
||||
ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new NotificationKpiResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
QueueDepth: 0,
|
||||
StuckCount: 0,
|
||||
ParkedCount: 0,
|
||||
DeliveredLastInterval: 0,
|
||||
OldestPendingAge: null));
|
||||
}
|
||||
|
||||
private async Task<NotificationKpiResponse> ComputeKpisAsync(
|
||||
@@ -497,6 +515,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
|
||||
return new NotificationKpiResponse(
|
||||
correlationId,
|
||||
Success: true,
|
||||
ErrorMessage: null,
|
||||
snapshot.QueueDepth,
|
||||
snapshot.StuckCount,
|
||||
snapshot.ParkedCount,
|
||||
@@ -504,6 +524,12 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
snapshot.OldestPendingAge);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The instant before which a still-pending notification counts as stuck — <paramref name="now"/>
|
||||
/// offset back by <see cref="NotificationOutboxOptions.StuckAgeThreshold"/>.
|
||||
/// </summary>
|
||||
private DateTimeOffset StuckCutoff(DateTimeOffset now) => now - _options.StuckAgeThreshold;
|
||||
|
||||
/// <summary>
|
||||
/// A notification counts as stuck when it is still in a non-terminal status
|
||||
/// (<c>Pending</c> or <c>Retrying</c>) and was created before the supplied cutoff.
|
||||
|
||||
@@ -169,9 +169,11 @@ public class NotificationMessagesTests
|
||||
public void NotificationKpiResponse_WithExpression_ChangesSingleField()
|
||||
{
|
||||
var kpi = new NotificationKpiResponse(
|
||||
"corr-1", 10, 2, 1, 5, TimeSpan.FromMinutes(3));
|
||||
"corr-1", Success: true, ErrorMessage: null, 10, 2, 1, 5, TimeSpan.FromMinutes(3));
|
||||
var updated = kpi with { QueueDepth = 12 };
|
||||
|
||||
Assert.True(kpi.Success);
|
||||
Assert.Null(kpi.ErrorMessage);
|
||||
Assert.Equal(10, kpi.QueueDepth);
|
||||
Assert.Equal(12, updated.QueueDepth);
|
||||
Assert.Equal(2, updated.StuckCount);
|
||||
|
||||
@@ -323,6 +323,8 @@ public class NotificationOutboxActorQueryTests : TestKit
|
||||
|
||||
var response = ExpectMsg<NotificationKpiResponse>();
|
||||
Assert.Equal("corr-11", response.CorrelationId);
|
||||
Assert.True(response.Success);
|
||||
Assert.Null(response.ErrorMessage);
|
||||
Assert.Equal(7, response.QueueDepth);
|
||||
Assert.Equal(2, response.StuckCount);
|
||||
Assert.Equal(3, response.ParkedCount);
|
||||
@@ -332,4 +334,27 @@ public class NotificationOutboxActorQueryTests : TestKit
|
||||
_repository.Received(1).ComputeKpisAsync(
|
||||
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void KpiRequest_RepositoryThrows_RepliesFailureResponse()
|
||||
{
|
||||
_repository.ComputeKpisAsync(
|
||||
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>())
|
||||
.ThrowsAsync(new InvalidOperationException("kpi db down"));
|
||||
var actor = CreateActor();
|
||||
|
||||
actor.Tell(new NotificationKpiRequest("corr-12"), TestActor);
|
||||
|
||||
// A repository fault yields a failure NotificationKpiResponse, not a Status.Failure.
|
||||
var response = ExpectMsg<NotificationKpiResponse>();
|
||||
Assert.Equal("corr-12", response.CorrelationId);
|
||||
Assert.False(response.Success);
|
||||
Assert.NotNull(response.ErrorMessage);
|
||||
Assert.Contains("kpi db down", response.ErrorMessage);
|
||||
Assert.Equal(0, response.QueueDepth);
|
||||
Assert.Equal(0, response.StuckCount);
|
||||
Assert.Equal(0, response.ParkedCount);
|
||||
Assert.Equal(0, response.DeliveredLastInterval);
|
||||
Assert.Null(response.OldestPendingAge);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user