feat(health): gRPC dependency health check
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
using Grpc.Core;
|
||||
using Grpc.Net.Client;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that verifies a downstream gRPC dependency is reachable over its
|
||||
/// <see cref="GrpcChannel"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The probe is injectable via <see cref="GrpcDependencyOptions.Probe"/>; the default drives the
|
||||
/// channel to a connected state with <see cref="GrpcChannel.ConnectAsync"/>. The result is
|
||||
/// <see cref="HealthStatus.Healthy"/> when the probe returns <c>true</c>, and
|
||||
/// <see cref="HealthStatus.Unhealthy"/> when it returns <c>false</c>, throws an
|
||||
/// <see cref="RpcException"/>, or times out / is cancelled within
|
||||
/// <see cref="GrpcDependencyOptions.Timeout"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Recommended registration tags: <see cref="ZbHealthTags.Ready"/> and
|
||||
/// <see cref="ZbHealthTags.Active"/> — a missing downstream gRPC dependency makes the node both
|
||||
/// not-ready and not-able-to-act. The registrant applies the tags.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class GrpcDependencyHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly GrpcChannel _channel;
|
||||
private readonly GrpcDependencyOptions _options;
|
||||
|
||||
/// <summary>Initializes a new <see cref="GrpcDependencyHealthCheck"/>.</summary>
|
||||
/// <param name="channel">The gRPC channel to the downstream dependency.</param>
|
||||
/// <param name="options">
|
||||
/// Probe, dependency name, and timeout. When <c>null</c>, defaults are used (the default probe is
|
||||
/// <see cref="GrpcChannel.ConnectAsync"/> with a 5 s timeout).
|
||||
/// </param>
|
||||
public GrpcDependencyHealthCheck(GrpcChannel channel, GrpcDependencyOptions? options = null)
|
||||
{
|
||||
_channel = channel ?? throw new ArgumentNullException(nameof(channel));
|
||||
_options = options ?? new GrpcDependencyOptions();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var name = _options.DependencyName ?? "gRPC dependency";
|
||||
var probe = _options.Probe ?? DefaultProbeAsync;
|
||||
|
||||
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
timeoutCts.CancelAfter(_options.Timeout);
|
||||
|
||||
try
|
||||
{
|
||||
var reachable = await probe(_channel, timeoutCts.Token).ConfigureAwait(false);
|
||||
return reachable
|
||||
? HealthCheckResult.Healthy($"{name} is reachable.")
|
||||
: HealthCheckResult.Unhealthy($"{name} is unreachable.");
|
||||
}
|
||||
catch (RpcException ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Status.StatusCode}.", ex);
|
||||
}
|
||||
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default probe: connects the channel and reports reachability. Returns <c>true</c> once the
|
||||
/// channel reaches a connected state; surfaces failures as a thrown exception (handled by the caller).
|
||||
/// </summary>
|
||||
private static async Task<bool> DefaultProbeAsync(GrpcChannel channel, CancellationToken cancellationToken)
|
||||
{
|
||||
await channel.ConnectAsync(cancellationToken).ConfigureAwait(false);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
using Grpc.Net.Client;
|
||||
|
||||
namespace ZB.MOM.WW.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Options for <see cref="GrpcDependencyHealthCheck"/>.
|
||||
/// </summary>
|
||||
public sealed class GrpcDependencyOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// The reachability probe. Returns <c>true</c> when the dependency is reachable, <c>false</c>
|
||||
/// otherwise. When <c>null</c> the default probe is used: <see cref="GrpcChannel.ConnectAsync"/>,
|
||||
/// which drives the channel to the <see cref="Grpc.Core.ConnectivityState.Ready"/> state (or
|
||||
/// throws / cancels on failure). Override to perform a richer probe, e.g. a
|
||||
/// <c>grpc.health.v1.Health/Check</c> RPC returning <c>SERVING</c>.
|
||||
/// </summary>
|
||||
public Func<GrpcChannel, CancellationToken, Task<bool>>? Probe { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Human-readable name of the dependency, surfaced in the <c>HealthCheckResult</c> description.
|
||||
/// </summary>
|
||||
public string? DependencyName { get; set; }
|
||||
|
||||
/// <summary>Maximum time the probe may take before it is treated as unreachable. Default 5 s.</summary>
|
||||
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5);
|
||||
}
|
||||
Reference in New Issue
Block a user