From aa2251b93d380609162984ce28e3ff1c946ceedc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 1 Jun 2026 07:00:21 -0400 Subject: [PATCH] feat(health): core review fixes (async writer, gRPC cancellation, validation, configurable retry-after) --- .../ActiveNodeGateEndpointFilter.cs | 26 +++++++++++-- .../GrpcDependencyHealthCheck.cs | 10 ++++- .../ZB.MOM.WW.Health/GrpcDependencyOptions.cs | 17 +++++++- ...lthResponseWriter.cs => ZbHealthWriter.cs} | 6 ++- .../GrpcDependencyHealthCheckTests.cs | 39 ++++++++++++++++++- .../shared-contract/ZB.MOM.WW.Health.md | 2 +- components/health/spec/SPEC.md | 6 +-- 7 files changed, 92 insertions(+), 14 deletions(-) rename ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/{ZbHealthResponseWriter.cs => ZbHealthWriter.cs} (88%) diff --git a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ActiveNodeGateEndpointFilter.cs b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ActiveNodeGateEndpointFilter.cs index 0590798..770bdd3 100644 --- a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ActiveNodeGateEndpointFilter.cs +++ b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ActiveNodeGateEndpointFilter.cs @@ -14,7 +14,19 @@ namespace ZB.MOM.WW.Health; public sealed class ActiveNodeGateEndpointFilter : IEndpointFilter { /// Default Retry-After value (seconds) advertised on a standby 503 response. - private const int RetryAfterSeconds = 5; + private const int DefaultRetryAfterSeconds = 5; + + private readonly int _retryAfterSeconds; + + /// Initializes a new using the default 5 s retry-after. + public ActiveNodeGateEndpointFilter() + : this(DefaultRetryAfterSeconds) + { + } + + /// Initializes a new . + /// The Retry-After value (seconds) advertised on a standby 503 response. + public ActiveNodeGateEndpointFilter(int retryAfterSeconds) => _retryAfterSeconds = retryAfterSeconds; /// /// Returns 503 (with Retry-After) when the resolved reports @@ -34,7 +46,8 @@ public sealed class ActiveNodeGateEndpointFilter : IEndpointFilter if (gate is { IsActiveNode: false }) { - httpContext.Response.Headers.RetryAfter = RetryAfterSeconds.ToString(); + httpContext.Response.Headers.RetryAfter = + _retryAfterSeconds.ToString(System.Globalization.CultureInfo.InvariantCulture); return Results.StatusCode(StatusCodes.Status503ServiceUnavailable); } @@ -53,10 +66,15 @@ public static class ActiveNodeGateExtensions /// returns 503 with a Retry-After header when the node is a standby. /// /// The endpoint convention builder to decorate. + /// + /// The Retry-After value (seconds) advertised on a standby 503 response. Defaults to 5. + /// /// The same for chaining. - public static IEndpointConventionBuilder RequireActiveNode(this IEndpointConventionBuilder builder) + public static IEndpointConventionBuilder RequireActiveNode( + this IEndpointConventionBuilder builder, + int retryAfterSeconds = 5) { ArgumentNullException.ThrowIfNull(builder); - return builder.AddEndpointFilter(new ActiveNodeGateEndpointFilter()); + return builder.AddEndpointFilter(new ActiveNodeGateEndpointFilter(retryAfterSeconds)); } } diff --git a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyHealthCheck.cs b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyHealthCheck.cs index 0c577d8..e1aa4c2 100644 --- a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyHealthCheck.cs +++ b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyHealthCheck.cs @@ -58,11 +58,19 @@ public sealed class GrpcDependencyHealthCheck : IHealthCheck ? HealthCheckResult.Healthy($"{name} is reachable.") : HealthCheckResult.Unhealthy($"{name} is unreachable."); } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; + } + catch (RpcException ex) when (ex.StatusCode == StatusCode.Cancelled && cancellationToken.IsCancellationRequested) + { + throw new OperationCanceledException(cancellationToken); + } catch (RpcException ex) { return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Status.StatusCode}.", ex); } - catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested) + catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested) { return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex); } diff --git a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyOptions.cs b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyOptions.cs index bd42812..6bd0a0e 100644 --- a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyOptions.cs +++ b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/GrpcDependencyOptions.cs @@ -21,6 +21,21 @@ public sealed class GrpcDependencyOptions /// public string? DependencyName { get; set; } + private TimeSpan _timeout = TimeSpan.FromSeconds(5); + /// Maximum time the probe may take before it is treated as unreachable. Default 5 s. - public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5); + /// Thrown when set to a value <= . + public TimeSpan Timeout + { + get => _timeout; + set + { + if (value <= TimeSpan.Zero) + { + throw new ArgumentOutOfRangeException(nameof(value), value, "Timeout must be greater than zero."); + } + + _timeout = value; + } + } } diff --git a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ZbHealthResponseWriter.cs b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ZbHealthWriter.cs similarity index 88% rename from ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ZbHealthResponseWriter.cs rename to ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ZbHealthWriter.cs index 8cbfe26..401626c 100644 --- a/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ZbHealthResponseWriter.cs +++ b/ZB.MOM.WW.Health/src/ZB.MOM.WW.Health/ZbHealthWriter.cs @@ -1,4 +1,5 @@ using System.Text.Json; +using System.Text.Json.Serialization; using Microsoft.AspNetCore.Http; using Microsoft.Extensions.Diagnostics.HealthChecks; @@ -28,6 +29,7 @@ public static class ZbHealthWriter private static readonly JsonSerializerOptions SerializerOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, }; /// @@ -35,7 +37,7 @@ public static class ZbHealthWriter /// /// The current HTTP context. Its is written to. /// The aggregated health report for the tier that ran. - public static Task WriteJsonAsync(HttpContext context, HealthReport report) + public static async Task WriteJsonAsync(HttpContext context, HealthReport report) { ArgumentNullException.ThrowIfNull(context); ArgumentNullException.ThrowIfNull(report); @@ -56,7 +58,7 @@ public static class ZbHealthWriter }), }; - return context.Response.WriteAsync(JsonSerializer.Serialize(payload, SerializerOptions)); + await JsonSerializer.SerializeAsync(context.Response.Body, payload, SerializerOptions, context.RequestAborted).ConfigureAwait(false); } private sealed class HealthReportDto diff --git a/ZB.MOM.WW.Health/tests/ZB.MOM.WW.Health.Tests/GrpcDependencyHealthCheckTests.cs b/ZB.MOM.WW.Health/tests/ZB.MOM.WW.Health.Tests/GrpcDependencyHealthCheckTests.cs index d1dd117..33769e4 100644 --- a/ZB.MOM.WW.Health/tests/ZB.MOM.WW.Health.Tests/GrpcDependencyHealthCheckTests.cs +++ b/ZB.MOM.WW.Health/tests/ZB.MOM.WW.Health.Tests/GrpcDependencyHealthCheckTests.cs @@ -14,14 +14,15 @@ public sealed class GrpcDependencyHealthCheckTests { private static readonly GrpcChannel Channel = GrpcChannel.ForAddress("http://localhost"); - private static async Task RunAsync(GrpcDependencyOptions options) + private static async Task RunAsync( + GrpcDependencyOptions options, CancellationToken cancellationToken = default) { var check = new GrpcDependencyHealthCheck(Channel, options); var context = new HealthCheckContext { Registration = new HealthCheckRegistration("grpc-dep", check, HealthStatus.Unhealthy, tags: null), }; - return await check.CheckHealthAsync(context, CancellationToken.None); + return await check.CheckHealthAsync(context, cancellationToken); } [Fact] @@ -69,4 +70,38 @@ public sealed class GrpcDependencyHealthCheckTests Assert.Equal(HealthStatus.Unhealthy, result.Status); Assert.Contains("mxaccessgw worker", result.Description); } + + [Fact] + public async Task ProbeExceedsTimeout_Unhealthy() + { + var result = await RunAsync(new GrpcDependencyOptions + { + Timeout = TimeSpan.FromMilliseconds(50), + Probe = static async (_, ct) => + { + await Task.Delay(Timeout.Infinite, ct); + return true; + }, + }); + + Assert.Equal(HealthStatus.Unhealthy, result.Status); + } + + [Fact] + public async Task ExternalCancellation_Throws() + { + using var cts = new CancellationTokenSource(); + await cts.CancelAsync(); + + await Assert.ThrowsAnyAsync(() => RunAsync( + new GrpcDependencyOptions + { + Probe = static async (_, ct) => + { + await Task.Delay(Timeout.Infinite, ct); + return true; + }, + }, + cts.Token)); + } } diff --git a/components/health/shared-contract/ZB.MOM.WW.Health.md b/components/health/shared-contract/ZB.MOM.WW.Health.md index 0f81a35..e786e78 100644 --- a/components/health/shared-contract/ZB.MOM.WW.Health.md +++ b/components/health/shared-contract/ZB.MOM.WW.Health.md @@ -73,7 +73,7 @@ public static class ZbHealthEndpointExtensions Action configure); } -/// Canonical JSON response writer. Shape: { status, totalDurationMs, entries: { name: { status, description, duration } } }. +/// Canonical JSON response writer. Shape: { status, totalDurationMs, entries: { name: { status, description, durationMs } } }. public static class ZbHealthWriter { public static Task WriteJsonAsync(HttpContext context, HealthReport report); diff --git a/components/health/spec/SPEC.md b/components/health/spec/SPEC.md index 93224e0..b956c04 100644 --- a/components/health/spec/SPEC.md +++ b/components/health/spec/SPEC.md @@ -121,12 +121,12 @@ All health endpoints share one canonical JSON serializer. The shape is lifted fr "database": { "status": "Healthy", "description": "SQL Server reachable", - "duration": "00:00:00.0120000" + "durationMs": 12 }, "akka-cluster": { "status": "Healthy", "description": "Member status: Up", - "duration": "00:00:00.0001000" + "durationMs": 0.1 } } } @@ -141,7 +141,7 @@ All health endpoints share one canonical JSON serializer. The shape is lifted fr | `entries` | object | Keyed by check registration name | | `entries..status` | string | Per-check status | | `entries..description` | string? | Human-readable detail (may be null) | -| `entries..duration` | string | TimeSpan `ToString()` — per-check elapsed time | +| `entries..durationMs` | number | Per-check elapsed time, milliseconds | The writer is exposed as a static `Task WriteJsonAsync(HttpContext, HealthReport)` so consumers can plug it into `MapHealthChecks` options and also call it from custom endpoints.