feat(health): core review fixes (async writer, gRPC cancellation, validation, configurable retry-after)
This commit is contained in:
@@ -14,7 +14,19 @@ namespace ZB.MOM.WW.Health;
|
||||
public sealed class ActiveNodeGateEndpointFilter : IEndpointFilter
|
||||
{
|
||||
/// <summary>Default <c>Retry-After</c> value (seconds) advertised on a standby 503 response.</summary>
|
||||
private const int RetryAfterSeconds = 5;
|
||||
private const int DefaultRetryAfterSeconds = 5;
|
||||
|
||||
private readonly int _retryAfterSeconds;
|
||||
|
||||
/// <summary>Initializes a new <see cref="ActiveNodeGateEndpointFilter"/> using the default 5 s retry-after.</summary>
|
||||
public ActiveNodeGateEndpointFilter()
|
||||
: this(DefaultRetryAfterSeconds)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>Initializes a new <see cref="ActiveNodeGateEndpointFilter"/>.</summary>
|
||||
/// <param name="retryAfterSeconds">The <c>Retry-After</c> value (seconds) advertised on a standby 503 response.</param>
|
||||
public ActiveNodeGateEndpointFilter(int retryAfterSeconds) => _retryAfterSeconds = retryAfterSeconds;
|
||||
|
||||
/// <summary>
|
||||
/// Returns 503 (with <c>Retry-After</c>) when the resolved <see cref="IActiveNodeGate"/> reports
|
||||
@@ -34,7 +46,8 @@ public sealed class ActiveNodeGateEndpointFilter : IEndpointFilter
|
||||
|
||||
if (gate is { IsActiveNode: false })
|
||||
{
|
||||
httpContext.Response.Headers.RetryAfter = RetryAfterSeconds.ToString();
|
||||
httpContext.Response.Headers.RetryAfter =
|
||||
_retryAfterSeconds.ToString(System.Globalization.CultureInfo.InvariantCulture);
|
||||
return Results.StatusCode(StatusCodes.Status503ServiceUnavailable);
|
||||
}
|
||||
|
||||
@@ -53,10 +66,15 @@ public static class ActiveNodeGateExtensions
|
||||
/// returns 503 with a <c>Retry-After</c> header when the node is a standby.
|
||||
/// </summary>
|
||||
/// <param name="builder">The endpoint convention builder to decorate.</param>
|
||||
/// <param name="retryAfterSeconds">
|
||||
/// The <c>Retry-After</c> value (seconds) advertised on a standby 503 response. Defaults to 5.
|
||||
/// </param>
|
||||
/// <returns>The same <paramref name="builder"/> for chaining.</returns>
|
||||
public static IEndpointConventionBuilder RequireActiveNode(this IEndpointConventionBuilder builder)
|
||||
public static IEndpointConventionBuilder RequireActiveNode(
|
||||
this IEndpointConventionBuilder builder,
|
||||
int retryAfterSeconds = 5)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
return builder.AddEndpointFilter(new ActiveNodeGateEndpointFilter());
|
||||
return builder.AddEndpointFilter(new ActiveNodeGateEndpointFilter(retryAfterSeconds));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,11 +58,19 @@ public sealed class GrpcDependencyHealthCheck : IHealthCheck
|
||||
? HealthCheckResult.Healthy($"{name} is reachable.")
|
||||
: HealthCheckResult.Unhealthy($"{name} is unreachable.");
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (RpcException ex) when (ex.StatusCode == StatusCode.Cancelled && cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw new OperationCanceledException(cancellationToken);
|
||||
}
|
||||
catch (RpcException ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Status.StatusCode}.", ex);
|
||||
}
|
||||
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
|
||||
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex);
|
||||
}
|
||||
|
||||
@@ -21,6 +21,21 @@ public sealed class GrpcDependencyOptions
|
||||
/// </summary>
|
||||
public string? DependencyName { get; set; }
|
||||
|
||||
private TimeSpan _timeout = TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <summary>Maximum time the probe may take before it is treated as unreachable. Default 5 s.</summary>
|
||||
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5);
|
||||
/// <exception cref="ArgumentOutOfRangeException">Thrown when set to a value <= <see cref="TimeSpan.Zero"/>.</exception>
|
||||
public TimeSpan Timeout
|
||||
{
|
||||
get => _timeout;
|
||||
set
|
||||
{
|
||||
if (value <= TimeSpan.Zero)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(value), value, "Timeout must be greater than zero.");
|
||||
}
|
||||
|
||||
_timeout = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+4
-2
@@ -1,4 +1,5 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
@@ -28,6 +29,7 @@ public static class ZbHealthWriter
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
@@ -35,7 +37,7 @@ public static class ZbHealthWriter
|
||||
/// </summary>
|
||||
/// <param name="context">The current HTTP context. Its <see cref="HttpResponse"/> is written to.</param>
|
||||
/// <param name="report">The aggregated health report for the tier that ran.</param>
|
||||
public static Task WriteJsonAsync(HttpContext context, HealthReport report)
|
||||
public static async Task WriteJsonAsync(HttpContext context, HealthReport report)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
ArgumentNullException.ThrowIfNull(report);
|
||||
@@ -56,7 +58,7 @@ public static class ZbHealthWriter
|
||||
}),
|
||||
};
|
||||
|
||||
return context.Response.WriteAsync(JsonSerializer.Serialize(payload, SerializerOptions));
|
||||
await JsonSerializer.SerializeAsync(context.Response.Body, payload, SerializerOptions, context.RequestAborted).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private sealed class HealthReportDto
|
||||
@@ -14,14 +14,15 @@ public sealed class GrpcDependencyHealthCheckTests
|
||||
{
|
||||
private static readonly GrpcChannel Channel = GrpcChannel.ForAddress("http://localhost");
|
||||
|
||||
private static async Task<HealthCheckResult> RunAsync(GrpcDependencyOptions options)
|
||||
private static async Task<HealthCheckResult> RunAsync(
|
||||
GrpcDependencyOptions options, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var check = new GrpcDependencyHealthCheck(Channel, options);
|
||||
var context = new HealthCheckContext
|
||||
{
|
||||
Registration = new HealthCheckRegistration("grpc-dep", check, HealthStatus.Unhealthy, tags: null),
|
||||
};
|
||||
return await check.CheckHealthAsync(context, CancellationToken.None);
|
||||
return await check.CheckHealthAsync(context, cancellationToken);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -69,4 +70,38 @@ public sealed class GrpcDependencyHealthCheckTests
|
||||
Assert.Equal(HealthStatus.Unhealthy, result.Status);
|
||||
Assert.Contains("mxaccessgw worker", result.Description);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ProbeExceedsTimeout_Unhealthy()
|
||||
{
|
||||
var result = await RunAsync(new GrpcDependencyOptions
|
||||
{
|
||||
Timeout = TimeSpan.FromMilliseconds(50),
|
||||
Probe = static async (_, ct) =>
|
||||
{
|
||||
await Task.Delay(Timeout.Infinite, ct);
|
||||
return true;
|
||||
},
|
||||
});
|
||||
|
||||
Assert.Equal(HealthStatus.Unhealthy, result.Status);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExternalCancellation_Throws()
|
||||
{
|
||||
using var cts = new CancellationTokenSource();
|
||||
await cts.CancelAsync();
|
||||
|
||||
await Assert.ThrowsAnyAsync<OperationCanceledException>(() => RunAsync(
|
||||
new GrpcDependencyOptions
|
||||
{
|
||||
Probe = static async (_, ct) =>
|
||||
{
|
||||
await Task.Delay(Timeout.Infinite, ct);
|
||||
return true;
|
||||
},
|
||||
},
|
||||
cts.Token));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ public static class ZbHealthEndpointExtensions
|
||||
Action<ZbHealthEndpointOptions> configure);
|
||||
}
|
||||
|
||||
/// Canonical JSON response writer. Shape: { status, totalDurationMs, entries: { name: { status, description, duration } } }.
|
||||
/// Canonical JSON response writer. Shape: { status, totalDurationMs, entries: { name: { status, description, durationMs } } }.
|
||||
public static class ZbHealthWriter
|
||||
{
|
||||
public static Task WriteJsonAsync(HttpContext context, HealthReport report);
|
||||
|
||||
@@ -121,12 +121,12 @@ All health endpoints share one canonical JSON serializer. The shape is lifted fr
|
||||
"database": {
|
||||
"status": "Healthy",
|
||||
"description": "SQL Server reachable",
|
||||
"duration": "00:00:00.0120000"
|
||||
"durationMs": 12
|
||||
},
|
||||
"akka-cluster": {
|
||||
"status": "Healthy",
|
||||
"description": "Member status: Up",
|
||||
"duration": "00:00:00.0001000"
|
||||
"durationMs": 0.1
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -141,7 +141,7 @@ All health endpoints share one canonical JSON serializer. The shape is lifted fr
|
||||
| `entries` | object | Keyed by check registration name |
|
||||
| `entries.<name>.status` | string | Per-check status |
|
||||
| `entries.<name>.description` | string? | Human-readable detail (may be null) |
|
||||
| `entries.<name>.duration` | string | TimeSpan `ToString()` — per-check elapsed time |
|
||||
| `entries.<name>.durationMs` | number | Per-check elapsed time, milliseconds |
|
||||
|
||||
The writer is exposed as a static `Task WriteJsonAsync(HttpContext, HealthReport)` so consumers can
|
||||
plug it into `MapHealthChecks` options and also call it from custom endpoints.
|
||||
|
||||
Reference in New Issue
Block a user