fix(notification-outbox): give KPI response a failure shape; log status-query faults
This commit is contained in:
@@ -82,9 +82,13 @@ public record NotificationKpiRequest(
|
|||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Central -> Outbox UI: KPI summary for the notification outbox dashboard.
|
/// Central -> Outbox UI: KPI summary for the notification outbox dashboard.
|
||||||
|
/// On a repository fault <see cref="Success"/> is <c>false</c>, <see cref="ErrorMessage"/>
|
||||||
|
/// carries the cause, and the KPI fields are zeroed/<c>null</c>.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public record NotificationKpiResponse(
|
public record NotificationKpiResponse(
|
||||||
string CorrelationId,
|
string CorrelationId,
|
||||||
|
bool Success,
|
||||||
|
string? ErrorMessage,
|
||||||
int QueueDepth,
|
int QueueDepth,
|
||||||
int StuckCount,
|
int StuckCount,
|
||||||
int ParkedCount,
|
int ParkedCount,
|
||||||
|
|||||||
@@ -298,9 +298,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
{
|
{
|
||||||
var sender = Sender;
|
var sender = Sender;
|
||||||
var now = DateTimeOffset.UtcNow;
|
var now = DateTimeOffset.UtcNow;
|
||||||
var stuckThreshold = _options.StuckAgeThreshold;
|
|
||||||
|
|
||||||
QueryOutboxAsync(request, now, stuckThreshold).PipeTo(
|
QueryOutboxAsync(request, now).PipeTo(
|
||||||
sender,
|
sender,
|
||||||
success: response => response,
|
success: response => response,
|
||||||
failure: ex => new NotificationOutboxQueryResponse(
|
failure: ex => new NotificationOutboxQueryResponse(
|
||||||
@@ -312,7 +311,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async Task<NotificationOutboxQueryResponse> QueryOutboxAsync(
|
private async Task<NotificationOutboxQueryResponse> QueryOutboxAsync(
|
||||||
NotificationOutboxQueryRequest request, DateTimeOffset now, TimeSpan stuckThreshold)
|
NotificationOutboxQueryRequest request, DateTimeOffset now)
|
||||||
{
|
{
|
||||||
var filter = new NotificationOutboxFilter(
|
var filter = new NotificationOutboxFilter(
|
||||||
Status: ParseEnum<NotificationStatus>(request.StatusFilter),
|
Status: ParseEnum<NotificationStatus>(request.StatusFilter),
|
||||||
@@ -321,7 +320,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
ListName: request.ListNameFilter,
|
ListName: request.ListNameFilter,
|
||||||
SubjectKeyword: request.SubjectKeyword,
|
SubjectKeyword: request.SubjectKeyword,
|
||||||
StuckOnly: request.StuckOnly,
|
StuckOnly: request.StuckOnly,
|
||||||
StuckCutoff: request.StuckOnly ? now - stuckThreshold : null,
|
StuckCutoff: request.StuckOnly ? StuckCutoff(now) : null,
|
||||||
From: request.From,
|
From: request.From,
|
||||||
To: request.To);
|
To: request.To);
|
||||||
|
|
||||||
@@ -329,7 +328,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||||
var (rows, totalCount) = await repository.QueryAsync(filter, request.PageNumber, request.PageSize);
|
var (rows, totalCount) = await repository.QueryAsync(filter, request.PageNumber, request.PageSize);
|
||||||
|
|
||||||
var stuckCutoff = now - stuckThreshold;
|
var stuckCutoff = StuckCutoff(now);
|
||||||
var summaries = rows
|
var summaries = rows
|
||||||
.Select(row => new NotificationSummary(
|
.Select(row => new NotificationSummary(
|
||||||
row.NotificationId,
|
row.NotificationId,
|
||||||
@@ -362,9 +361,17 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
StatusQueryAsync(query).PipeTo(
|
StatusQueryAsync(query).PipeTo(
|
||||||
sender,
|
sender,
|
||||||
success: response => response,
|
success: response => response,
|
||||||
failure: _ => new NotificationStatusResponse(
|
failure: ex =>
|
||||||
query.CorrelationId, Found: false, Status: string.Empty,
|
{
|
||||||
RetryCount: 0, LastError: null, DeliveredAt: null));
|
// NotificationStatusResponse has no error field, so a repository fault is
|
||||||
|
// reported as Found: false — log the fault so a transient DB error is not
|
||||||
|
// silently indistinguishable from a genuinely-missing notification.
|
||||||
|
_logger.LogWarning(
|
||||||
|
ex, "Status query for notification {NotificationId} failed.", query.NotificationId);
|
||||||
|
return new NotificationStatusResponse(
|
||||||
|
query.CorrelationId, Found: false, Status: string.Empty,
|
||||||
|
RetryCount: 0, LastError: null, DeliveredAt: null);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task<NotificationStatusResponse> StatusQueryAsync(NotificationStatusQuery query)
|
private async Task<NotificationStatusResponse> StatusQueryAsync(NotificationStatusQuery query)
|
||||||
@@ -482,10 +489,21 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
{
|
{
|
||||||
var sender = Sender;
|
var sender = Sender;
|
||||||
var now = DateTimeOffset.UtcNow;
|
var now = DateTimeOffset.UtcNow;
|
||||||
var stuckCutoff = now - _options.StuckAgeThreshold;
|
var stuckCutoff = StuckCutoff(now);
|
||||||
var deliveredSince = now - _options.DeliveredKpiWindow;
|
var deliveredSince = now - _options.DeliveredKpiWindow;
|
||||||
|
|
||||||
ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(sender);
|
ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(
|
||||||
|
sender,
|
||||||
|
success: response => response,
|
||||||
|
failure: ex => new NotificationKpiResponse(
|
||||||
|
request.CorrelationId,
|
||||||
|
Success: false,
|
||||||
|
ErrorMessage: ex.GetBaseException().Message,
|
||||||
|
QueueDepth: 0,
|
||||||
|
StuckCount: 0,
|
||||||
|
ParkedCount: 0,
|
||||||
|
DeliveredLastInterval: 0,
|
||||||
|
OldestPendingAge: null));
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task<NotificationKpiResponse> ComputeKpisAsync(
|
private async Task<NotificationKpiResponse> ComputeKpisAsync(
|
||||||
@@ -497,6 +515,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
|
|
||||||
return new NotificationKpiResponse(
|
return new NotificationKpiResponse(
|
||||||
correlationId,
|
correlationId,
|
||||||
|
Success: true,
|
||||||
|
ErrorMessage: null,
|
||||||
snapshot.QueueDepth,
|
snapshot.QueueDepth,
|
||||||
snapshot.StuckCount,
|
snapshot.StuckCount,
|
||||||
snapshot.ParkedCount,
|
snapshot.ParkedCount,
|
||||||
@@ -504,6 +524,12 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
|||||||
snapshot.OldestPendingAge);
|
snapshot.OldestPendingAge);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The instant before which a still-pending notification counts as stuck — <paramref name="now"/>
|
||||||
|
/// offset back by <see cref="NotificationOutboxOptions.StuckAgeThreshold"/>.
|
||||||
|
/// </summary>
|
||||||
|
private DateTimeOffset StuckCutoff(DateTimeOffset now) => now - _options.StuckAgeThreshold;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// A notification counts as stuck when it is still in a non-terminal status
|
/// A notification counts as stuck when it is still in a non-terminal status
|
||||||
/// (<c>Pending</c> or <c>Retrying</c>) and was created before the supplied cutoff.
|
/// (<c>Pending</c> or <c>Retrying</c>) and was created before the supplied cutoff.
|
||||||
|
|||||||
@@ -169,9 +169,11 @@ public class NotificationMessagesTests
|
|||||||
public void NotificationKpiResponse_WithExpression_ChangesSingleField()
|
public void NotificationKpiResponse_WithExpression_ChangesSingleField()
|
||||||
{
|
{
|
||||||
var kpi = new NotificationKpiResponse(
|
var kpi = new NotificationKpiResponse(
|
||||||
"corr-1", 10, 2, 1, 5, TimeSpan.FromMinutes(3));
|
"corr-1", Success: true, ErrorMessage: null, 10, 2, 1, 5, TimeSpan.FromMinutes(3));
|
||||||
var updated = kpi with { QueueDepth = 12 };
|
var updated = kpi with { QueueDepth = 12 };
|
||||||
|
|
||||||
|
Assert.True(kpi.Success);
|
||||||
|
Assert.Null(kpi.ErrorMessage);
|
||||||
Assert.Equal(10, kpi.QueueDepth);
|
Assert.Equal(10, kpi.QueueDepth);
|
||||||
Assert.Equal(12, updated.QueueDepth);
|
Assert.Equal(12, updated.QueueDepth);
|
||||||
Assert.Equal(2, updated.StuckCount);
|
Assert.Equal(2, updated.StuckCount);
|
||||||
|
|||||||
@@ -323,6 +323,8 @@ public class NotificationOutboxActorQueryTests : TestKit
|
|||||||
|
|
||||||
var response = ExpectMsg<NotificationKpiResponse>();
|
var response = ExpectMsg<NotificationKpiResponse>();
|
||||||
Assert.Equal("corr-11", response.CorrelationId);
|
Assert.Equal("corr-11", response.CorrelationId);
|
||||||
|
Assert.True(response.Success);
|
||||||
|
Assert.Null(response.ErrorMessage);
|
||||||
Assert.Equal(7, response.QueueDepth);
|
Assert.Equal(7, response.QueueDepth);
|
||||||
Assert.Equal(2, response.StuckCount);
|
Assert.Equal(2, response.StuckCount);
|
||||||
Assert.Equal(3, response.ParkedCount);
|
Assert.Equal(3, response.ParkedCount);
|
||||||
@@ -332,4 +334,27 @@ public class NotificationOutboxActorQueryTests : TestKit
|
|||||||
_repository.Received(1).ComputeKpisAsync(
|
_repository.Received(1).ComputeKpisAsync(
|
||||||
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>());
|
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void KpiRequest_RepositoryThrows_RepliesFailureResponse()
|
||||||
|
{
|
||||||
|
_repository.ComputeKpisAsync(
|
||||||
|
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>())
|
||||||
|
.ThrowsAsync(new InvalidOperationException("kpi db down"));
|
||||||
|
var actor = CreateActor();
|
||||||
|
|
||||||
|
actor.Tell(new NotificationKpiRequest("corr-12"), TestActor);
|
||||||
|
|
||||||
|
// A repository fault yields a failure NotificationKpiResponse, not a Status.Failure.
|
||||||
|
var response = ExpectMsg<NotificationKpiResponse>();
|
||||||
|
Assert.Equal("corr-12", response.CorrelationId);
|
||||||
|
Assert.False(response.Success);
|
||||||
|
Assert.NotNull(response.ErrorMessage);
|
||||||
|
Assert.Contains("kpi db down", response.ErrorMessage);
|
||||||
|
Assert.Equal(0, response.QueueDepth);
|
||||||
|
Assert.Equal(0, response.StuckCount);
|
||||||
|
Assert.Equal(0, response.ParkedCount);
|
||||||
|
Assert.Equal(0, response.DeliveredLastInterval);
|
||||||
|
Assert.Null(response.OldestPendingAge);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user