Fix all baseline code-review findings across the six shared libraries
Resolves the 35 findings from the 2026-06-01 baseline (commit 26ba1c7),
test-first for every behavioral change. +51 tests (331 -> 382 passing, 0 failed).
- Telemetry-001 (HIGH): RedactionEnricher now honours property removal, so a
redactor that drops a key actually scrubs the secret from the event.
- Auth: LDAP validator ValidateOnStart; API-key verify no longer fails on a
best-effort MarkUsed write or a corrupt scopes column (fail-closed); LDAP cert
validation hook; KeyPrefix persistence aligned; README algorithm corrected.
- Health: Akka checks return Degraded (not throw) when the cluster isn't up yet;
GrpcDependencyHealthCheck catch-all; null 'description' rendered; composite
endpoint builder; XML docs shipped.
- Audit: CompositeAuditWriter no longer re-throws OperationCanceledException;
TruncatingAuditRedactor over-redact scrubs Target + safe negative max; options
record; XML docs shipped.
- Configuration: TryAddEnumerable idempotent registration; consistent port
quoting; strict invariant port parsing; XML docs + README packaged.
- Theme: mobile toggle is now CSS-only (no Bootstrap JS); token/CSS hygiene;
XML docs on the public parameter surface.
Shared-contract/spec docs updated where the code was the source of truth
(observability service.instance.id, MapZbMetrics, redactor reach). All changes
additive/back-compatible at v0.1.0. code-reviews bookkeeping follows separately.
This commit is contained in:
@@ -7,6 +7,11 @@
|
||||
<LangVersion>latest</LangVersion>
|
||||
<Version>0.1.0</Version>
|
||||
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
|
||||
<!-- Emit XML docs so the public API summaries ship inside the packed nupkgs (IntelliSense for
|
||||
consumers). CS1591 (missing doc on a public member) is suppressed so undocumented test /
|
||||
non-packed members do not break the build; the src public surface is fully documented. -->
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<NoWarn>$(NoWarn);CS1591</NoWarn>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -103,27 +103,41 @@ public sealed class ActiveNodeHealthCheck : IHealthCheck
|
||||
if (system is null)
|
||||
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
|
||||
|
||||
var cluster = Cluster.Get(system);
|
||||
var self = cluster.SelfMember;
|
||||
var selfUp = self.Status == MemberStatus.Up;
|
||||
|
||||
MemberStatus selfStatus;
|
||||
bool selfUp;
|
||||
bool hasRole;
|
||||
bool isLeader;
|
||||
if (_role is null)
|
||||
try
|
||||
{
|
||||
hasRole = false;
|
||||
var leader = cluster.State.Leader;
|
||||
isLeader = leader is not null && leader == self.Address;
|
||||
// Reading cluster membership can throw while the ActorSystem exists but the cluster has
|
||||
// not finished initialising (e.g. Akka.Cluster not yet configured →
|
||||
// ConfigurationException). The spec's startup-safety rule maps this to Degraded rather
|
||||
// than letting the exception escape (which the host would record as Unhealthy).
|
||||
var cluster = Cluster.Get(system);
|
||||
var self = cluster.SelfMember;
|
||||
selfStatus = self.Status;
|
||||
selfUp = selfStatus == MemberStatus.Up;
|
||||
|
||||
if (_role is null)
|
||||
{
|
||||
hasRole = false;
|
||||
var leader = cluster.State.Leader;
|
||||
isLeader = leader is not null && leader == self.Address;
|
||||
}
|
||||
else
|
||||
{
|
||||
hasRole = self.HasRole(_role);
|
||||
var roleLeader = cluster.State.RoleLeader(_role);
|
||||
isLeader = roleLeader is not null && roleLeader == self.Address;
|
||||
}
|
||||
}
|
||||
else
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
hasRole = self.HasRole(_role);
|
||||
var roleLeader = cluster.State.RoleLeader(_role);
|
||||
isLeader = roleLeader is not null && roleLeader == self.Address;
|
||||
return Task.FromResult(HealthCheckResult.Degraded("Akka cluster state not yet accessible.", ex));
|
||||
}
|
||||
|
||||
var health = ActiveNodeDecision.Evaluate(selfUp, isLeader, hasRole, _role);
|
||||
var description = DescribeResult(health, self.Status, selfUp, isLeader);
|
||||
var description = DescribeResult(health, selfStatus, selfUp, isLeader);
|
||||
var result = health switch
|
||||
{
|
||||
HealthStatus.Healthy => HealthCheckResult.Healthy(description),
|
||||
|
||||
@@ -8,7 +8,8 @@ namespace ZB.MOM.WW.Health.Akka;
|
||||
/// <summary>
|
||||
/// Health check that maps the local node's Akka cluster membership status to a
|
||||
/// <see cref="HealthStatus"/> through a configurable <see cref="AkkaClusterStatusPolicy"/>.
|
||||
/// Register to the <see cref="ZbHealthTags.Ready"/> tag (recommended <c>[ready, active]</c>).
|
||||
/// Register to the <see cref="ZbHealthTags.Ready"/> tag only — cluster membership is a readiness
|
||||
/// concern; the <see cref="ZbHealthTags.Active"/> tier is reserved for the leader / active-node probe.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider. If it is not yet
|
||||
@@ -42,7 +43,21 @@ public sealed class AkkaClusterHealthCheck : IHealthCheck
|
||||
if (system is null)
|
||||
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
|
||||
|
||||
var status = Cluster.Get(system).SelfMember.Status;
|
||||
MemberStatus status;
|
||||
try
|
||||
{
|
||||
// Cluster.Get(system).SelfMember can throw while the ActorSystem exists but the cluster
|
||||
// has not finished initialising (e.g. Akka.Cluster not yet configured →
|
||||
// ConfigurationException). The spec's startup-safety rule maps this to Degraded, not an
|
||||
// escaping exception (which the host would record as Unhealthy and pull the node from
|
||||
// rotation).
|
||||
status = Cluster.Get(system).SelfMember.Status;
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
return Task.FromResult(HealthCheckResult.Degraded("Akka cluster state not yet accessible.", ex));
|
||||
}
|
||||
|
||||
var health = _policy.Evaluate(status);
|
||||
var description = $"Akka cluster member status: {status}";
|
||||
var result = health switch
|
||||
|
||||
@@ -13,14 +13,15 @@ namespace ZB.MOM.WW.Health;
|
||||
/// The probe is injectable via <see cref="GrpcDependencyOptions.Probe"/>; the default drives the
|
||||
/// channel to a connected state with <see cref="GrpcChannel.ConnectAsync"/>. The result is
|
||||
/// <see cref="HealthStatus.Healthy"/> when the probe returns <c>true</c>, and
|
||||
/// <see cref="HealthStatus.Unhealthy"/> when it returns <c>false</c>, throws an
|
||||
/// <see cref="RpcException"/>, or times out / is cancelled within
|
||||
/// <see cref="GrpcDependencyOptions.Timeout"/>.
|
||||
/// <see cref="HealthStatus.Unhealthy"/> when it returns <c>false</c>, throws any exception
|
||||
/// (<see cref="RpcException"/> or otherwise), or times out within
|
||||
/// <see cref="GrpcDependencyOptions.Timeout"/>. External cancellation of the supplied
|
||||
/// <see cref="CancellationToken"/> propagates as an <see cref="OperationCanceledException"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Recommended registration tags: <see cref="ZbHealthTags.Ready"/> and
|
||||
/// <see cref="ZbHealthTags.Active"/> — a missing downstream gRPC dependency makes the node both
|
||||
/// not-ready and not-able-to-act. The registrant applies the tags.
|
||||
/// Recommended registration tag: <see cref="ZbHealthTags.Ready"/> only — downstream gRPC
|
||||
/// reachability is a readiness concern; the <see cref="ZbHealthTags.Active"/> tier is reserved for
|
||||
/// the leader / active-node probe. The registrant applies the tag.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class GrpcDependencyHealthCheck : IHealthCheck
|
||||
@@ -74,6 +75,15 @@ public sealed class GrpcDependencyHealthCheck : IHealthCheck
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Catch-all to match the sibling DatabaseHealthCheck: any other probe error
|
||||
// (e.g. InvalidOperationException / HttpRequestException / SocketException from the
|
||||
// transport, or anything a custom probe throws) maps to Unhealthy rather than escaping
|
||||
// the IHealthCheck boundary. The OCE/Rpc external-cancellation handlers above run first,
|
||||
// so caller cancellation still propagates.
|
||||
return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Message}", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -28,9 +28,9 @@ public static class ZbHealthEndpointExtensions
|
||||
/// emits a minimal <c>200 OK</c> body.
|
||||
/// </remarks>
|
||||
/// <returns>
|
||||
/// The <see cref="IEndpointConventionBuilder"/> for the readiness (<c>/health/ready</c>) endpoint.
|
||||
/// A single tier is returned (rather than a composite) to keep the API simple; conventions
|
||||
/// applied to the result affect only the readiness endpoint.
|
||||
/// A composite <see cref="IEndpointConventionBuilder"/> that fans every chained convention out to
|
||||
/// <em>all three</em> health endpoints (readiness, active, and liveness). For example,
|
||||
/// <c>endpoints.MapZbHealth().RequireHost("…")</c> gates all three endpoints, as a caller expects.
|
||||
/// </returns>
|
||||
public static IEndpointConventionBuilder MapZbHealth(
|
||||
this IEndpointRouteBuilder endpoints,
|
||||
@@ -47,7 +47,7 @@ public static class ZbHealthEndpointExtensions
|
||||
ResponseWriter = responseWriter,
|
||||
}).AllowAnonymous();
|
||||
|
||||
endpoints.MapHealthChecks(options.ActivePath, new HealthCheckOptions
|
||||
var active = endpoints.MapHealthChecks(options.ActivePath, new HealthCheckOptions
|
||||
{
|
||||
Predicate = static c => c.Tags.Contains(ZbHealthTags.Active),
|
||||
ResponseWriter = responseWriter,
|
||||
@@ -56,12 +56,38 @@ public static class ZbHealthEndpointExtensions
|
||||
// Liveness: run no checks. The endpoint returns 200 as long as the process can respond.
|
||||
// No JSON writer — the empty report would carry no useful data, so the framework default
|
||||
// (a minimal plain-text body) is sufficient.
|
||||
endpoints.MapHealthChecks(options.LivePath, new HealthCheckOptions
|
||||
var live = endpoints.MapHealthChecks(options.LivePath, new HealthCheckOptions
|
||||
{
|
||||
Predicate = static _ => false,
|
||||
}).AllowAnonymous();
|
||||
|
||||
return ready;
|
||||
return new CompositeEndpointConventionBuilder(ready, active, live);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// An <see cref="IEndpointConventionBuilder"/> that forwards each convention to several
|
||||
/// underlying builders, so conventions chained onto the result of
|
||||
/// <see cref="MapZbHealth(IEndpointRouteBuilder, ZbHealthEndpointOptions?)"/> apply to all three
|
||||
/// health endpoints rather than just one.
|
||||
/// </summary>
|
||||
private sealed class CompositeEndpointConventionBuilder : IEndpointConventionBuilder
|
||||
{
|
||||
private readonly IEndpointConventionBuilder[] _builders;
|
||||
|
||||
public CompositeEndpointConventionBuilder(params IEndpointConventionBuilder[] builders) =>
|
||||
_builders = builders;
|
||||
|
||||
public void Add(Action<EndpointBuilder> convention)
|
||||
{
|
||||
foreach (var builder in _builders)
|
||||
builder.Add(convention);
|
||||
}
|
||||
|
||||
public void Finally(Action<EndpointBuilder> finalConvention)
|
||||
{
|
||||
foreach (var builder in _builders)
|
||||
builder.Finally(finalConvention);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -70,7 +96,10 @@ public static class ZbHealthEndpointExtensions
|
||||
/// </summary>
|
||||
/// <param name="endpoints">The endpoint route builder to map onto.</param>
|
||||
/// <param name="configure">Callback that mutates a fresh <see cref="ZbHealthEndpointOptions"/>.</param>
|
||||
/// <returns>The <see cref="IEndpointConventionBuilder"/> for the readiness endpoint.</returns>
|
||||
/// <returns>
|
||||
/// A composite <see cref="IEndpointConventionBuilder"/> that fans chained conventions out to all
|
||||
/// three health endpoints.
|
||||
/// </returns>
|
||||
public static IEndpointConventionBuilder MapZbHealth(
|
||||
this IEndpointRouteBuilder endpoints,
|
||||
Action<ZbHealthEndpointOptions> configure)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
@@ -21,15 +20,21 @@ namespace ZB.MOM.WW.Health;
|
||||
/// }
|
||||
/// }
|
||||
/// </code>
|
||||
/// The HTTP status code is left to the ASP.NET Core health-checks middleware (Healthy/Degraded → 200,
|
||||
/// Unhealthy → 503); this writer only renders the body and sets <c>Content-Type: application/json</c>.
|
||||
/// The <c>description</c> key is always present; when a check supplies no description it is emitted
|
||||
/// as JSON <c>null</c> (not omitted), matching the spec example and the <c>HealthChecks.UI.Client</c>
|
||||
/// shape. The HTTP status code is left to the ASP.NET Core health-checks middleware (Healthy/Degraded
|
||||
/// → 200, Unhealthy → 503); this writer only renders the body and sets
|
||||
/// <c>Content-Type: application/json</c>.
|
||||
/// </remarks>
|
||||
public static class ZbHealthWriter
|
||||
{
|
||||
// Null properties are emitted (not omitted) so a null `description` renders as
|
||||
// "description": null — matching the SPEC §3 example and the HealthChecks.UI.Client shape this
|
||||
// writer mirrors. Consumers can then read entries.<name>.description without handling a missing
|
||||
// property. (Do not set DefaultIgnoreCondition = WhenWritingNull here.)
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -86,6 +86,50 @@ public sealed class ActiveNodeDecisionTests
|
||||
Assert.False(gate.IsActiveNode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HealthCheck_RoleLess_ClusterInaccessible_ReturnsDegraded()
|
||||
{
|
||||
// ActorSystem present but Akka.Cluster not configured → Cluster.Get throws. The check must
|
||||
// return Degraded (startup-safety rule), not let the exception escape (→ Unhealthy).
|
||||
using var system = ActorSystem.Create("plain-no-cluster-roleless");
|
||||
try
|
||||
{
|
||||
var provider = new ServiceCollection()
|
||||
.AddSingleton(system)
|
||||
.BuildServiceProvider();
|
||||
var check = new ActiveNodeHealthCheck(provider);
|
||||
|
||||
var result = await check.CheckHealthAsync(NewContext(check));
|
||||
|
||||
Assert.Equal(HealthStatus.Degraded, result.Status);
|
||||
}
|
||||
finally
|
||||
{
|
||||
await system.Terminate();
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HealthCheck_RoleFiltered_ClusterInaccessible_ReturnsDegraded()
|
||||
{
|
||||
using var system = ActorSystem.Create("plain-no-cluster-rolefiltered");
|
||||
try
|
||||
{
|
||||
var provider = new ServiceCollection()
|
||||
.AddSingleton(system)
|
||||
.BuildServiceProvider();
|
||||
var check = new ActiveNodeHealthCheck(provider, "admin");
|
||||
|
||||
var result = await check.CheckHealthAsync(NewContext(check));
|
||||
|
||||
Assert.Equal(HealthStatus.Degraded, result.Status);
|
||||
}
|
||||
finally
|
||||
{
|
||||
await system.Terminate();
|
||||
}
|
||||
}
|
||||
|
||||
private static HealthCheckContext NewContext(IHealthCheck check) => new()
|
||||
{
|
||||
Registration = new HealthCheckRegistration("active-node", check, HealthStatus.Unhealthy, tags: null),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
@@ -70,6 +71,30 @@ public sealed class AkkaClusterStatusPolicyTests
|
||||
Assert.Equal(HealthStatus.Degraded, result.Status);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HealthCheck_ActorSystemPresentButClusterInaccessible_ReturnsDegraded()
|
||||
{
|
||||
// A plain (non-clustered) ActorSystem exists in DI, but Akka.Cluster is not configured,
|
||||
// so Cluster.Get(system) throws a ConfigurationException — the startup race the spec calls
|
||||
// out. The check must return Degraded, not let the exception escape (→ Unhealthy via the host).
|
||||
using var system = ActorSystem.Create("plain-no-cluster");
|
||||
try
|
||||
{
|
||||
var provider = new ServiceCollection()
|
||||
.AddSingleton(system)
|
||||
.BuildServiceProvider();
|
||||
var check = new AkkaClusterHealthCheck(provider, AkkaClusterStatusPolicy.Default);
|
||||
|
||||
var result = await check.CheckHealthAsync(NewContext(check));
|
||||
|
||||
Assert.Equal(HealthStatus.Degraded, result.Status);
|
||||
}
|
||||
finally
|
||||
{
|
||||
await system.Terminate();
|
||||
}
|
||||
}
|
||||
|
||||
private static HealthCheckContext NewContext(IHealthCheck check) => new()
|
||||
{
|
||||
Registration = new HealthCheckRegistration("akka-cluster", check, HealthStatus.Unhealthy, tags: null),
|
||||
|
||||
@@ -71,6 +71,21 @@ public sealed class GrpcDependencyHealthCheckTests
|
||||
Assert.Contains("mxaccessgw worker", result.Description);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ProbeThrowsArbitraryException_Unhealthy()
|
||||
{
|
||||
// A non-RpcException / non-OperationCanceledException (e.g. the transport surfacing an
|
||||
// InvalidOperationException) must be caught and mapped to Unhealthy, not allowed to escape.
|
||||
var result = await RunAsync(new GrpcDependencyOptions
|
||||
{
|
||||
DependencyName = "mxaccessgw worker",
|
||||
Probe = static (_, _) => throw new InvalidOperationException("channel disposed"),
|
||||
});
|
||||
|
||||
Assert.Equal(HealthStatus.Unhealthy, result.Status);
|
||||
Assert.Contains("mxaccessgw worker", result.Description);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ProbeExceedsTimeout_Unhealthy()
|
||||
{
|
||||
|
||||
@@ -31,7 +31,7 @@ public sealed class ResponseWriterTests
|
||||
}
|
||||
|
||||
private static async Task<HttpResponseMessage> GetReadyAsync(
|
||||
HealthStatus status, string description = "db reachable")
|
||||
HealthStatus status, string? description = "db reachable")
|
||||
{
|
||||
var builder = WebApplication.CreateBuilder();
|
||||
builder.WebHost.UseTestServer();
|
||||
@@ -66,6 +66,24 @@ public sealed class ResponseWriterTests
|
||||
Assert.Equal("db reachable", db.GetProperty("description").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ReadyEndpoint_NullDescription_EmitsDescriptionKeyAsNull()
|
||||
{
|
||||
// A check that produces no description must still emit the "description" key with a JSON null
|
||||
// value (matching the spec §3 example and the HealthChecks.UI.Client shape) rather than
|
||||
// dropping the key — so consumers can read entries.<name>.description without handling a
|
||||
// missing property.
|
||||
var response = await GetReadyAsync(HealthStatus.Healthy, description: null);
|
||||
|
||||
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
|
||||
|
||||
using var doc = JsonDocument.Parse(await response.Content.ReadAsStringAsync());
|
||||
var db = doc.RootElement.GetProperty("entries").GetProperty("db");
|
||||
|
||||
Assert.True(db.TryGetProperty("description", out var description), "description key must be present");
|
||||
Assert.Equal(JsonValueKind.Null, description.ValueKind);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ReadyEndpoint_Degraded_Returns200_WithDegradedStatus()
|
||||
{
|
||||
|
||||
@@ -127,6 +127,31 @@ public sealed class TierMappingTests
|
||||
Assert.Equal(0, active.Invocations);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ChainedConvention_AppliesToAllThreeEndpoints()
|
||||
{
|
||||
// MapZbHealth returns a composite builder, so a convention chained onto its result
|
||||
// (.RequireHost) must gate all three endpoints — not just readiness. With a host filter
|
||||
// that does not match the default test-client host, every tier returns 404.
|
||||
var builder = WebApplication.CreateBuilder();
|
||||
builder.WebHost.UseTestServer();
|
||||
builder.Services.AddHealthChecks()
|
||||
.AddCheck("ready-check", new RecordingHealthCheck(HealthStatus.Healthy), tags: new[] { ZbHealthTags.Ready })
|
||||
.AddCheck("active-check", new RecordingHealthCheck(HealthStatus.Healthy), tags: new[] { ZbHealthTags.Active });
|
||||
|
||||
await using var app = builder.Build();
|
||||
app.MapZbHealth().RequireHost("health.internal");
|
||||
await app.StartAsync();
|
||||
|
||||
var client = app.GetTestClient();
|
||||
|
||||
// The default test host does not match "health.internal", so the convention removed every
|
||||
// endpoint from this host — confirming it fanned out to all three, not just readiness.
|
||||
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/health/ready")).StatusCode);
|
||||
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/health/active")).StatusCode);
|
||||
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/healthz")).StatusCode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Options_OverrideRoutePaths()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user