fix(notification-outbox): give KPI response a failure shape; log status-query faults

This commit is contained in:
Joseph Doherty
2026-05-19 01:55:46 -04:00
parent 82e3eb0e93
commit 77a05a8960
4 changed files with 68 additions and 11 deletions

View File

@@ -82,9 +82,13 @@ public record NotificationKpiRequest(
/// <summary>
/// Central -> Outbox UI: KPI summary for the notification outbox dashboard.
/// On a repository fault <see cref="Success"/> is <c>false</c>, <see cref="ErrorMessage"/>
/// carries the cause, and the KPI fields are zeroed/<c>null</c>.
/// </summary>
public record NotificationKpiResponse(
string CorrelationId,
bool Success,
string? ErrorMessage,
int QueueDepth,
int StuckCount,
int ParkedCount,

View File

@@ -298,9 +298,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
{
var sender = Sender;
var now = DateTimeOffset.UtcNow;
var stuckThreshold = _options.StuckAgeThreshold;
QueryOutboxAsync(request, now, stuckThreshold).PipeTo(
QueryOutboxAsync(request, now).PipeTo(
sender,
success: response => response,
failure: ex => new NotificationOutboxQueryResponse(
@@ -312,7 +311,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
}
private async Task<NotificationOutboxQueryResponse> QueryOutboxAsync(
NotificationOutboxQueryRequest request, DateTimeOffset now, TimeSpan stuckThreshold)
NotificationOutboxQueryRequest request, DateTimeOffset now)
{
var filter = new NotificationOutboxFilter(
Status: ParseEnum<NotificationStatus>(request.StatusFilter),
@@ -321,7 +320,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
ListName: request.ListNameFilter,
SubjectKeyword: request.SubjectKeyword,
StuckOnly: request.StuckOnly,
StuckCutoff: request.StuckOnly ? now - stuckThreshold : null,
StuckCutoff: request.StuckOnly ? StuckCutoff(now) : null,
From: request.From,
To: request.To);
@@ -329,7 +328,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
var (rows, totalCount) = await repository.QueryAsync(filter, request.PageNumber, request.PageSize);
var stuckCutoff = now - stuckThreshold;
var stuckCutoff = StuckCutoff(now);
var summaries = rows
.Select(row => new NotificationSummary(
row.NotificationId,
@@ -362,9 +361,17 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
StatusQueryAsync(query).PipeTo(
sender,
success: response => response,
failure: _ => new NotificationStatusResponse(
query.CorrelationId, Found: false, Status: string.Empty,
RetryCount: 0, LastError: null, DeliveredAt: null));
failure: ex =>
{
// NotificationStatusResponse has no error field, so a repository fault is
// reported as Found: false — log the fault so a transient DB error is not
// silently indistinguishable from a genuinely-missing notification.
_logger.LogWarning(
ex, "Status query for notification {NotificationId} failed.", query.NotificationId);
return new NotificationStatusResponse(
query.CorrelationId, Found: false, Status: string.Empty,
RetryCount: 0, LastError: null, DeliveredAt: null);
});
}
private async Task<NotificationStatusResponse> StatusQueryAsync(NotificationStatusQuery query)
@@ -482,10 +489,21 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
{
var sender = Sender;
var now = DateTimeOffset.UtcNow;
var stuckCutoff = now - _options.StuckAgeThreshold;
var stuckCutoff = StuckCutoff(now);
var deliveredSince = now - _options.DeliveredKpiWindow;
ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(sender);
ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(
sender,
success: response => response,
failure: ex => new NotificationKpiResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
QueueDepth: 0,
StuckCount: 0,
ParkedCount: 0,
DeliveredLastInterval: 0,
OldestPendingAge: null));
}
private async Task<NotificationKpiResponse> ComputeKpisAsync(
@@ -497,6 +515,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
return new NotificationKpiResponse(
correlationId,
Success: true,
ErrorMessage: null,
snapshot.QueueDepth,
snapshot.StuckCount,
snapshot.ParkedCount,
@@ -504,6 +524,12 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
snapshot.OldestPendingAge);
}
/// <summary>
/// The instant before which a still-pending notification counts as stuck — <paramref name="now"/>
/// offset back by <see cref="NotificationOutboxOptions.StuckAgeThreshold"/>.
/// </summary>
private DateTimeOffset StuckCutoff(DateTimeOffset now) => now - _options.StuckAgeThreshold;
/// <summary>
/// A notification counts as stuck when it is still in a non-terminal status
/// (<c>Pending</c> or <c>Retrying</c>) and was created before the supplied cutoff.