feat(health): gRPC dependency health check

This commit is contained in:
Joseph Doherty
2026-06-01 06:44:05 -04:00
parent 5b82d68ea9
commit 1ab2f32e8e
3 changed files with 178 additions and 0 deletions
@@ -0,0 +1,80 @@
using Grpc.Core;
using Grpc.Net.Client;
using Microsoft.Extensions.Diagnostics.HealthChecks;
namespace ZB.MOM.WW.Health;
/// <summary>
/// Health check that verifies a downstream gRPC dependency is reachable over its
/// <see cref="GrpcChannel"/>.
/// </summary>
/// <remarks>
/// <para>
/// The probe is injectable via <see cref="GrpcDependencyOptions.Probe"/>; the default drives the
/// channel to a connected state with <see cref="GrpcChannel.ConnectAsync"/>. The result is
/// <see cref="HealthStatus.Healthy"/> when the probe returns <c>true</c>, and
/// <see cref="HealthStatus.Unhealthy"/> when it returns <c>false</c>, throws an
/// <see cref="RpcException"/>, or times out / is cancelled within
/// <see cref="GrpcDependencyOptions.Timeout"/>.
/// </para>
/// <para>
/// Recommended registration tags: <see cref="ZbHealthTags.Ready"/> and
/// <see cref="ZbHealthTags.Active"/> — a missing downstream gRPC dependency makes the node both
/// not-ready and not-able-to-act. The registrant applies the tags.
/// </para>
/// </remarks>
public sealed class GrpcDependencyHealthCheck : IHealthCheck
{
private readonly GrpcChannel _channel;
private readonly GrpcDependencyOptions _options;
/// <summary>Initializes a new <see cref="GrpcDependencyHealthCheck"/>.</summary>
/// <param name="channel">The gRPC channel to the downstream dependency.</param>
/// <param name="options">
/// Probe, dependency name, and timeout. When <c>null</c>, defaults are used (the default probe is
/// <see cref="GrpcChannel.ConnectAsync"/> with a 5 s timeout).
/// </param>
public GrpcDependencyHealthCheck(GrpcChannel channel, GrpcDependencyOptions? options = null)
{
_channel = channel ?? throw new ArgumentNullException(nameof(channel));
_options = options ?? new GrpcDependencyOptions();
}
/// <inheritdoc />
public async Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
var name = _options.DependencyName ?? "gRPC dependency";
var probe = _options.Probe ?? DefaultProbeAsync;
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
timeoutCts.CancelAfter(_options.Timeout);
try
{
var reachable = await probe(_channel, timeoutCts.Token).ConfigureAwait(false);
return reachable
? HealthCheckResult.Healthy($"{name} is reachable.")
: HealthCheckResult.Unhealthy($"{name} is unreachable.");
}
catch (RpcException ex)
{
return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Status.StatusCode}.", ex);
}
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested && !cancellationToken.IsCancellationRequested)
{
return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex);
}
}
/// <summary>
/// Default probe: connects the channel and reports reachability. Returns <c>true</c> once the
/// channel reaches a connected state; surfaces failures as a thrown exception (handled by the caller).
/// </summary>
private static async Task<bool> DefaultProbeAsync(GrpcChannel channel, CancellationToken cancellationToken)
{
await channel.ConnectAsync(cancellationToken).ConfigureAwait(false);
return true;
}
}
@@ -0,0 +1,26 @@
using Grpc.Net.Client;
namespace ZB.MOM.WW.Health;
/// <summary>
/// Options for <see cref="GrpcDependencyHealthCheck"/>.
/// </summary>
public sealed class GrpcDependencyOptions
{
/// <summary>
/// The reachability probe. Returns <c>true</c> when the dependency is reachable, <c>false</c>
/// otherwise. When <c>null</c> the default probe is used: <see cref="GrpcChannel.ConnectAsync"/>,
/// which drives the channel to the <see cref="Grpc.Core.ConnectivityState.Ready"/> state (or
/// throws / cancels on failure). Override to perform a richer probe, e.g. a
/// <c>grpc.health.v1.Health/Check</c> RPC returning <c>SERVING</c>.
/// </summary>
public Func<GrpcChannel, CancellationToken, Task<bool>>? Probe { get; set; }
/// <summary>
/// Human-readable name of the dependency, surfaced in the <c>HealthCheckResult</c> description.
/// </summary>
public string? DependencyName { get; set; }
/// <summary>Maximum time the probe may take before it is treated as unreachable. Default 5 s.</summary>
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5);
}
@@ -0,0 +1,72 @@
using Grpc.Core;
using Grpc.Net.Client;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using ZB.MOM.WW.Health;
namespace ZB.MOM.WW.Health.Tests;
/// <summary>
/// Verifies <see cref="GrpcDependencyHealthCheck"/> via an injected probe (no live gRPC server):
/// probe-true → Healthy, probe-false → Unhealthy, and an <see cref="RpcException"/> from the probe
/// → Unhealthy. The channel is constructed but never dialled because the probe is stubbed.
/// </summary>
public sealed class GrpcDependencyHealthCheckTests
{
private static readonly GrpcChannel Channel = GrpcChannel.ForAddress("http://localhost");
private static async Task<HealthCheckResult> RunAsync(GrpcDependencyOptions options)
{
var check = new GrpcDependencyHealthCheck(Channel, options);
var context = new HealthCheckContext
{
Registration = new HealthCheckRegistration("grpc-dep", check, HealthStatus.Unhealthy, tags: null),
};
return await check.CheckHealthAsync(context, CancellationToken.None);
}
[Fact]
public async Task ProbeReturnsTrue_Healthy()
{
var result = await RunAsync(new GrpcDependencyOptions
{
Probe = static (_, _) => Task.FromResult(true),
});
Assert.Equal(HealthStatus.Healthy, result.Status);
}
[Fact]
public async Task ProbeReturnsFalse_Unhealthy()
{
var result = await RunAsync(new GrpcDependencyOptions
{
Probe = static (_, _) => Task.FromResult(false),
});
Assert.Equal(HealthStatus.Unhealthy, result.Status);
}
[Fact]
public async Task ProbeThrowsRpcException_Unhealthy()
{
var result = await RunAsync(new GrpcDependencyOptions
{
Probe = static (_, _) => throw new RpcException(new Status(StatusCode.Unavailable, "down")),
});
Assert.Equal(HealthStatus.Unhealthy, result.Status);
}
[Fact]
public async Task DependencyName_AppearsInDescription()
{
var result = await RunAsync(new GrpcDependencyOptions
{
DependencyName = "mxaccessgw worker",
Probe = static (_, _) => Task.FromResult(false),
});
Assert.Equal(HealthStatus.Unhealthy, result.Status);
Assert.Contains("mxaccessgw worker", result.Description);
}
}