feat(health): core review fixes (async writer, gRPC cancellation, validation, configurable retry-after)

This commit is contained in:
Joseph Doherty
2026-06-01 07:00:21 -04:00
parent cf277eb7df
commit aa2251b93d
7 changed files with 92 additions and 14 deletions
@@ -14,7 +14,19 @@ namespace ZB.MOM.WW.Health;
public sealed class ActiveNodeGateEndpointFilter : IEndpointFilter
{
/// <summary>Default <c>Retry-After</c> value (seconds) advertised on a standby 503 response.</summary>
private const int RetryAfterSeconds = 5;
private const int DefaultRetryAfterSeconds = 5;
private readonly int _retryAfterSeconds;
/// <summary>Initializes a new <see cref="ActiveNodeGateEndpointFilter"/> using the default 5 s retry-after.</summary>
public ActiveNodeGateEndpointFilter()
: this(DefaultRetryAfterSeconds)
{
}
/// <summary>Initializes a new <see cref="ActiveNodeGateEndpointFilter"/>.</summary>
/// <param name="retryAfterSeconds">The <c>Retry-After</c> value (seconds) advertised on a standby 503 response.</param>
public ActiveNodeGateEndpointFilter(int retryAfterSeconds) => _retryAfterSeconds = retryAfterSeconds;
/// <summary>
/// Returns 503 (with <c>Retry-After</c>) when the resolved <see cref="IActiveNodeGate"/> reports
@@ -34,7 +46,8 @@ public sealed class ActiveNodeGateEndpointFilter : IEndpointFilter
if (gate is { IsActiveNode: false })
{
httpContext.Response.Headers.RetryAfter = RetryAfterSeconds.ToString();
httpContext.Response.Headers.RetryAfter =
_retryAfterSeconds.ToString(System.Globalization.CultureInfo.InvariantCulture);
return Results.StatusCode(StatusCodes.Status503ServiceUnavailable);
}
@@ -53,10 +66,15 @@ public static class ActiveNodeGateExtensions
/// returns 503 with a <c>Retry-After</c> header when the node is a standby.
/// </summary>
/// <param name="builder">The endpoint convention builder to decorate.</param>
/// <param name="retryAfterSeconds">
/// The <c>Retry-After</c> value (seconds) advertised on a standby 503 response. Defaults to 5.
/// </param>
/// <returns>The same <paramref name="builder"/> for chaining.</returns>
public static IEndpointConventionBuilder RequireActiveNode(this IEndpointConventionBuilder builder)
public static IEndpointConventionBuilder RequireActiveNode(
this IEndpointConventionBuilder builder,
int retryAfterSeconds = 5)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddEndpointFilter(new ActiveNodeGateEndpointFilter());
return builder.AddEndpointFilter(new ActiveNodeGateEndpointFilter(retryAfterSeconds));
}
}
@@ -58,11 +58,19 @@ public sealed class GrpcDependencyHealthCheck : IHealthCheck
? HealthCheckResult.Healthy($"{name} is reachable.")
: HealthCheckResult.Unhealthy($"{name} is unreachable.");
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (RpcException ex) when (ex.StatusCode == StatusCode.Cancelled && cancellationToken.IsCancellationRequested)
{
throw new OperationCanceledException(cancellationToken);
}
catch (RpcException ex)
{
return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Status.StatusCode}.", ex);
}
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested)
{
return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex);
}
@@ -21,6 +21,21 @@ public sealed class GrpcDependencyOptions
/// </summary>
public string? DependencyName { get; set; }
private TimeSpan _timeout = TimeSpan.FromSeconds(5);
/// <summary>Maximum time the probe may take before it is treated as unreachable. Default 5 s.</summary>
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5);
/// <exception cref="ArgumentOutOfRangeException">Thrown when set to a value &lt;= <see cref="TimeSpan.Zero"/>.</exception>
public TimeSpan Timeout
{
get => _timeout;
set
{
if (value <= TimeSpan.Zero)
{
throw new ArgumentOutOfRangeException(nameof(value), value, "Timeout must be greater than zero.");
}
_timeout = value;
}
}
}
@@ -1,4 +1,5 @@
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Diagnostics.HealthChecks;
@@ -28,6 +29,7 @@ public static class ZbHealthWriter
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
/// <summary>
@@ -35,7 +37,7 @@ public static class ZbHealthWriter
/// </summary>
/// <param name="context">The current HTTP context. Its <see cref="HttpResponse"/> is written to.</param>
/// <param name="report">The aggregated health report for the tier that ran.</param>
public static Task WriteJsonAsync(HttpContext context, HealthReport report)
public static async Task WriteJsonAsync(HttpContext context, HealthReport report)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(report);
@@ -56,7 +58,7 @@ public static class ZbHealthWriter
}),
};
return context.Response.WriteAsync(JsonSerializer.Serialize(payload, SerializerOptions));
await JsonSerializer.SerializeAsync(context.Response.Body, payload, SerializerOptions, context.RequestAborted).ConfigureAwait(false);
}
private sealed class HealthReportDto
@@ -14,14 +14,15 @@ public sealed class GrpcDependencyHealthCheckTests
{
private static readonly GrpcChannel Channel = GrpcChannel.ForAddress("http://localhost");
private static async Task<HealthCheckResult> RunAsync(GrpcDependencyOptions options)
private static async Task<HealthCheckResult> RunAsync(
GrpcDependencyOptions options, CancellationToken cancellationToken = default)
{
var check = new GrpcDependencyHealthCheck(Channel, options);
var context = new HealthCheckContext
{
Registration = new HealthCheckRegistration("grpc-dep", check, HealthStatus.Unhealthy, tags: null),
};
return await check.CheckHealthAsync(context, CancellationToken.None);
return await check.CheckHealthAsync(context, cancellationToken);
}
[Fact]
@@ -69,4 +70,38 @@ public sealed class GrpcDependencyHealthCheckTests
Assert.Equal(HealthStatus.Unhealthy, result.Status);
Assert.Contains("mxaccessgw worker", result.Description);
}
[Fact]
public async Task ProbeExceedsTimeout_Unhealthy()
{
var result = await RunAsync(new GrpcDependencyOptions
{
Timeout = TimeSpan.FromMilliseconds(50),
Probe = static async (_, ct) =>
{
await Task.Delay(Timeout.Infinite, ct);
return true;
},
});
Assert.Equal(HealthStatus.Unhealthy, result.Status);
}
[Fact]
public async Task ExternalCancellation_Throws()
{
using var cts = new CancellationTokenSource();
await cts.CancelAsync();
await Assert.ThrowsAnyAsync<OperationCanceledException>(() => RunAsync(
new GrpcDependencyOptions
{
Probe = static async (_, ct) =>
{
await Task.Delay(Timeout.Infinite, ct);
return true;
},
},
cts.Token));
}
}
@@ -73,7 +73,7 @@ public static class ZbHealthEndpointExtensions
Action<ZbHealthEndpointOptions> configure);
}
/// Canonical JSON response writer. Shape: { status, totalDurationMs, entries: { name: { status, description, duration } } }.
/// Canonical JSON response writer. Shape: { status, totalDurationMs, entries: { name: { status, description, durationMs } } }.
public static class ZbHealthWriter
{
public static Task WriteJsonAsync(HttpContext context, HealthReport report);
+3 -3
View File
@@ -121,12 +121,12 @@ All health endpoints share one canonical JSON serializer. The shape is lifted fr
"database": {
"status": "Healthy",
"description": "SQL Server reachable",
"duration": "00:00:00.0120000"
"durationMs": 12
},
"akka-cluster": {
"status": "Healthy",
"description": "Member status: Up",
"duration": "00:00:00.0001000"
"durationMs": 0.1
}
}
}
@@ -141,7 +141,7 @@ All health endpoints share one canonical JSON serializer. The shape is lifted fr
| `entries` | object | Keyed by check registration name |
| `entries.<name>.status` | string | Per-check status |
| `entries.<name>.description` | string? | Human-readable detail (may be null) |
| `entries.<name>.duration` | string | TimeSpan `ToString()` — per-check elapsed time |
| `entries.<name>.durationMs` | number | Per-check elapsed time, milliseconds |
The writer is exposed as a static `Task WriteJsonAsync(HttpContext, HealthReport)` so consumers can
plug it into `MapHealthChecks` options and also call it from custom endpoints.