Fix all baseline code-review findings across the six shared libraries

Resolves the 35 findings from the 2026-06-01 baseline (commit 26ba1c7),
test-first for every behavioral change. +51 tests (331 -> 382 passing, 0 failed).

- Telemetry-001 (HIGH): RedactionEnricher now honours property removal, so a
  redactor that drops a key actually scrubs the secret from the event.
- Auth: LDAP validator ValidateOnStart; API-key verify no longer fails on a
  best-effort MarkUsed write or a corrupt scopes column (fail-closed); LDAP cert
  validation hook; KeyPrefix persistence aligned; README algorithm corrected.
- Health: Akka checks return Degraded (not throw) when the cluster isn't up yet;
  GrpcDependencyHealthCheck catch-all; null 'description' rendered; composite
  endpoint builder; XML docs shipped.
- Audit: CompositeAuditWriter no longer re-throws OperationCanceledException;
  TruncatingAuditRedactor over-redact scrubs Target + safe negative max; options
  record; XML docs shipped.
- Configuration: TryAddEnumerable idempotent registration; consistent port
  quoting; strict invariant port parsing; XML docs + README packaged.
- Theme: mobile toggle is now CSS-only (no Bootstrap JS); token/CSS hygiene;
  XML docs on the public parameter surface.

Shared-contract/spec docs updated where the code was the source of truth
(observability service.instance.id, MapZbMetrics, redactor reach). All changes
additive/back-compatible at v0.1.0. code-reviews bookkeeping follows separately.
This commit is contained in:
Joseph Doherty
2026-06-01 11:22:14 -04:00
parent 26ba1c7215
commit 544a6ddb77
72 changed files with 1539 additions and 191 deletions
@@ -86,6 +86,50 @@ public sealed class ActiveNodeDecisionTests
Assert.False(gate.IsActiveNode);
}
[Fact]
public async Task HealthCheck_RoleLess_ClusterInaccessible_ReturnsDegraded()
{
// ActorSystem present but Akka.Cluster not configured → Cluster.Get throws. The check must
// return Degraded (startup-safety rule), not let the exception escape (→ Unhealthy).
using var system = ActorSystem.Create("plain-no-cluster-roleless");
try
{
var provider = new ServiceCollection()
.AddSingleton(system)
.BuildServiceProvider();
var check = new ActiveNodeHealthCheck(provider);
var result = await check.CheckHealthAsync(NewContext(check));
Assert.Equal(HealthStatus.Degraded, result.Status);
}
finally
{
await system.Terminate();
}
}
[Fact]
public async Task HealthCheck_RoleFiltered_ClusterInaccessible_ReturnsDegraded()
{
using var system = ActorSystem.Create("plain-no-cluster-rolefiltered");
try
{
var provider = new ServiceCollection()
.AddSingleton(system)
.BuildServiceProvider();
var check = new ActiveNodeHealthCheck(provider, "admin");
var result = await check.CheckHealthAsync(NewContext(check));
Assert.Equal(HealthStatus.Degraded, result.Status);
}
finally
{
await system.Terminate();
}
}
private static HealthCheckContext NewContext(IHealthCheck check) => new()
{
Registration = new HealthCheckRegistration("active-node", check, HealthStatus.Unhealthy, tags: null),
@@ -1,3 +1,4 @@
using Akka.Actor;
using Akka.Cluster;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Diagnostics.HealthChecks;
@@ -70,6 +71,30 @@ public sealed class AkkaClusterStatusPolicyTests
Assert.Equal(HealthStatus.Degraded, result.Status);
}
[Fact]
public async Task HealthCheck_ActorSystemPresentButClusterInaccessible_ReturnsDegraded()
{
// A plain (non-clustered) ActorSystem exists in DI, but Akka.Cluster is not configured,
// so Cluster.Get(system) throws a ConfigurationException — the startup race the spec calls
// out. The check must return Degraded, not let the exception escape (→ Unhealthy via the host).
using var system = ActorSystem.Create("plain-no-cluster");
try
{
var provider = new ServiceCollection()
.AddSingleton(system)
.BuildServiceProvider();
var check = new AkkaClusterHealthCheck(provider, AkkaClusterStatusPolicy.Default);
var result = await check.CheckHealthAsync(NewContext(check));
Assert.Equal(HealthStatus.Degraded, result.Status);
}
finally
{
await system.Terminate();
}
}
private static HealthCheckContext NewContext(IHealthCheck check) => new()
{
Registration = new HealthCheckRegistration("akka-cluster", check, HealthStatus.Unhealthy, tags: null),
@@ -71,6 +71,21 @@ public sealed class GrpcDependencyHealthCheckTests
Assert.Contains("mxaccessgw worker", result.Description);
}
[Fact]
public async Task ProbeThrowsArbitraryException_Unhealthy()
{
// A non-RpcException / non-OperationCanceledException (e.g. the transport surfacing an
// InvalidOperationException) must be caught and mapped to Unhealthy, not allowed to escape.
var result = await RunAsync(new GrpcDependencyOptions
{
DependencyName = "mxaccessgw worker",
Probe = static (_, _) => throw new InvalidOperationException("channel disposed"),
});
Assert.Equal(HealthStatus.Unhealthy, result.Status);
Assert.Contains("mxaccessgw worker", result.Description);
}
[Fact]
public async Task ProbeExceedsTimeout_Unhealthy()
{
@@ -31,7 +31,7 @@ public sealed class ResponseWriterTests
}
private static async Task<HttpResponseMessage> GetReadyAsync(
HealthStatus status, string description = "db reachable")
HealthStatus status, string? description = "db reachable")
{
var builder = WebApplication.CreateBuilder();
builder.WebHost.UseTestServer();
@@ -66,6 +66,24 @@ public sealed class ResponseWriterTests
Assert.Equal("db reachable", db.GetProperty("description").GetString());
}
[Fact]
public async Task ReadyEndpoint_NullDescription_EmitsDescriptionKeyAsNull()
{
// A check that produces no description must still emit the "description" key with a JSON null
// value (matching the spec §3 example and the HealthChecks.UI.Client shape) rather than
// dropping the key — so consumers can read entries.<name>.description without handling a
// missing property.
var response = await GetReadyAsync(HealthStatus.Healthy, description: null);
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
using var doc = JsonDocument.Parse(await response.Content.ReadAsStringAsync());
var db = doc.RootElement.GetProperty("entries").GetProperty("db");
Assert.True(db.TryGetProperty("description", out var description), "description key must be present");
Assert.Equal(JsonValueKind.Null, description.ValueKind);
}
[Fact]
public async Task ReadyEndpoint_Degraded_Returns200_WithDegradedStatus()
{
@@ -127,6 +127,31 @@ public sealed class TierMappingTests
Assert.Equal(0, active.Invocations);
}
[Fact]
public async Task ChainedConvention_AppliesToAllThreeEndpoints()
{
// MapZbHealth returns a composite builder, so a convention chained onto its result
// (.RequireHost) must gate all three endpoints — not just readiness. With a host filter
// that does not match the default test-client host, every tier returns 404.
var builder = WebApplication.CreateBuilder();
builder.WebHost.UseTestServer();
builder.Services.AddHealthChecks()
.AddCheck("ready-check", new RecordingHealthCheck(HealthStatus.Healthy), tags: new[] { ZbHealthTags.Ready })
.AddCheck("active-check", new RecordingHealthCheck(HealthStatus.Healthy), tags: new[] { ZbHealthTags.Active });
await using var app = builder.Build();
app.MapZbHealth().RequireHost("health.internal");
await app.StartAsync();
var client = app.GetTestClient();
// The default test host does not match "health.internal", so the convention removed every
// endpoint from this host — confirming it fanned out to all three, not just readiness.
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/health/ready")).StatusCode);
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/health/active")).StatusCode);
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/healthz")).StatusCode);
}
[Fact]
public async Task Options_OverrideRoutePaths()
{