feat(health): gRPC dependency health check
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
using Grpc.Core;
|
||||
using Grpc.Net.Client;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that verifies a downstream gRPC dependency is reachable over its
|
||||
/// <see cref="GrpcChannel"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The probe is injectable via <see cref="GrpcDependencyOptions.Probe"/>; the default drives the
|
||||
/// channel to a connected state with <see cref="GrpcChannel.ConnectAsync"/>. The result is
|
||||
/// <see cref="HealthStatus.Healthy"/> when the probe returns <c>true</c>, and
|
||||
/// <see cref="HealthStatus.Unhealthy"/> when it returns <c>false</c>, throws an
|
||||
/// <see cref="RpcException"/>, or times out / is cancelled within
|
||||
/// <see cref="GrpcDependencyOptions.Timeout"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Recommended registration tags: <see cref="ZbHealthTags.Ready"/> and
|
||||
/// <see cref="ZbHealthTags.Active"/> — a missing downstream gRPC dependency makes the node both
|
||||
/// not-ready and not-able-to-act. The registrant applies the tags.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class GrpcDependencyHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly GrpcChannel _channel;
|
||||
private readonly GrpcDependencyOptions _options;
|
||||
|
||||
/// <summary>Initializes a new <see cref="GrpcDependencyHealthCheck"/>.</summary>
|
||||
/// <param name="channel">The gRPC channel to the downstream dependency.</param>
|
||||
/// <param name="options">
|
||||
/// Probe, dependency name, and timeout. When <c>null</c>, defaults are used (the default probe is
|
||||
/// <see cref="GrpcChannel.ConnectAsync"/> with a 5 s timeout).
|
||||
/// </param>
|
||||
public GrpcDependencyHealthCheck(GrpcChannel channel, GrpcDependencyOptions? options = null)
|
||||
{
|
||||
_channel = channel ?? throw new ArgumentNullException(nameof(channel));
|
||||
_options = options ?? new GrpcDependencyOptions();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var name = _options.DependencyName ?? "gRPC dependency";
|
||||
var probe = _options.Probe ?? DefaultProbeAsync;
|
||||
|
||||
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
timeoutCts.CancelAfter(_options.Timeout);
|
||||
|
||||
try
|
||||
{
|
||||
var reachable = await probe(_channel, timeoutCts.Token).ConfigureAwait(false);
|
||||
return reachable
|
||||
? HealthCheckResult.Healthy($"{name} is reachable.")
|
||||
: HealthCheckResult.Unhealthy($"{name} is unreachable.");
|
||||
}
|
||||
catch (RpcException ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Status.StatusCode}.", ex);
|
||||
}
|
||||
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default probe: connects the channel and reports reachability. Returns <c>true</c> once the
|
||||
/// channel reaches a connected state; surfaces failures as a thrown exception (handled by the caller).
|
||||
/// </summary>
|
||||
private static async Task<bool> DefaultProbeAsync(GrpcChannel channel, CancellationToken cancellationToken)
|
||||
{
|
||||
await channel.ConnectAsync(cancellationToken).ConfigureAwait(false);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
using Grpc.Net.Client;
|
||||
|
||||
namespace ZB.MOM.WW.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Options for <see cref="GrpcDependencyHealthCheck"/>.
|
||||
/// </summary>
|
||||
public sealed class GrpcDependencyOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// The reachability probe. Returns <c>true</c> when the dependency is reachable, <c>false</c>
|
||||
/// otherwise. When <c>null</c> the default probe is used: <see cref="GrpcChannel.ConnectAsync"/>,
|
||||
/// which drives the channel to the <see cref="Grpc.Core.ConnectivityState.Ready"/> state (or
|
||||
/// throws / cancels on failure). Override to perform a richer probe, e.g. a
|
||||
/// <c>grpc.health.v1.Health/Check</c> RPC returning <c>SERVING</c>.
|
||||
/// </summary>
|
||||
public Func<GrpcChannel, CancellationToken, Task<bool>>? Probe { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Human-readable name of the dependency, surfaced in the <c>HealthCheckResult</c> description.
|
||||
/// </summary>
|
||||
public string? DependencyName { get; set; }
|
||||
|
||||
/// <summary>Maximum time the probe may take before it is treated as unreachable. Default 5 s.</summary>
|
||||
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5);
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
using Grpc.Core;
|
||||
using Grpc.Net.Client;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.Health;
|
||||
|
||||
namespace ZB.MOM.WW.Health.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies <see cref="GrpcDependencyHealthCheck"/> via an injected probe (no live gRPC server):
|
||||
/// probe-true → Healthy, probe-false → Unhealthy, and an <see cref="RpcException"/> from the probe
|
||||
/// → Unhealthy. The channel is constructed but never dialled because the probe is stubbed.
|
||||
/// </summary>
|
||||
public sealed class GrpcDependencyHealthCheckTests
|
||||
{
|
||||
private static readonly GrpcChannel Channel = GrpcChannel.ForAddress("http://localhost");
|
||||
|
||||
private static async Task<HealthCheckResult> RunAsync(GrpcDependencyOptions options)
|
||||
{
|
||||
var check = new GrpcDependencyHealthCheck(Channel, options);
|
||||
var context = new HealthCheckContext
|
||||
{
|
||||
Registration = new HealthCheckRegistration("grpc-dep", check, HealthStatus.Unhealthy, tags: null),
|
||||
};
|
||||
return await check.CheckHealthAsync(context, CancellationToken.None);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ProbeReturnsTrue_Healthy()
|
||||
{
|
||||
var result = await RunAsync(new GrpcDependencyOptions
|
||||
{
|
||||
Probe = static (_, _) => Task.FromResult(true),
|
||||
});
|
||||
|
||||
Assert.Equal(HealthStatus.Healthy, result.Status);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ProbeReturnsFalse_Unhealthy()
|
||||
{
|
||||
var result = await RunAsync(new GrpcDependencyOptions
|
||||
{
|
||||
Probe = static (_, _) => Task.FromResult(false),
|
||||
});
|
||||
|
||||
Assert.Equal(HealthStatus.Unhealthy, result.Status);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ProbeThrowsRpcException_Unhealthy()
|
||||
{
|
||||
var result = await RunAsync(new GrpcDependencyOptions
|
||||
{
|
||||
Probe = static (_, _) => throw new RpcException(new Status(StatusCode.Unavailable, "down")),
|
||||
});
|
||||
|
||||
Assert.Equal(HealthStatus.Unhealthy, result.Status);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DependencyName_AppearsInDescription()
|
||||
{
|
||||
var result = await RunAsync(new GrpcDependencyOptions
|
||||
{
|
||||
DependencyName = "mxaccessgw worker",
|
||||
Probe = static (_, _) => Task.FromResult(false),
|
||||
});
|
||||
|
||||
Assert.Equal(HealthStatus.Unhealthy, result.Status);
|
||||
Assert.Contains("mxaccessgw worker", result.Description);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user