Fix all baseline code-review findings across the six shared libraries

Resolves the 35 findings from the 2026-06-01 baseline (commit 26ba1c7),
test-first for every behavioral change. +51 tests (331 -> 382 passing, 0 failed).

- Telemetry-001 (HIGH): RedactionEnricher now honours property removal, so a
  redactor that drops a key actually scrubs the secret from the event.
- Auth: LDAP validator ValidateOnStart; API-key verify no longer fails on a
  best-effort MarkUsed write or a corrupt scopes column (fail-closed); LDAP cert
  validation hook; KeyPrefix persistence aligned; README algorithm corrected.
- Health: Akka checks return Degraded (not throw) when the cluster isn't up yet;
  GrpcDependencyHealthCheck catch-all; null 'description' rendered; composite
  endpoint builder; XML docs shipped.
- Audit: CompositeAuditWriter no longer re-throws OperationCanceledException;
  TruncatingAuditRedactor over-redact scrubs Target + safe negative max; options
  record; XML docs shipped.
- Configuration: TryAddEnumerable idempotent registration; consistent port
  quoting; strict invariant port parsing; XML docs + README packaged.
- Theme: mobile toggle is now CSS-only (no Bootstrap JS); token/CSS hygiene;
  XML docs on the public parameter surface.

Shared-contract/spec docs updated where the code was the source of truth
(observability service.instance.id, MapZbMetrics, redactor reach). All changes
additive/back-compatible at v0.1.0. code-reviews bookkeeping follows separately.
This commit is contained in:
Joseph Doherty
2026-06-01 11:22:14 -04:00
parent 26ba1c7215
commit 544a6ddb77
72 changed files with 1539 additions and 191 deletions
@@ -87,6 +87,33 @@ public sealed class ApiKeyAdminCommandsTests : IAsyncLifetime
Assert.Single(recent, e => e.EventType == "create-key");
}
[Fact]
public async Task CreateKey_PersistsBareTokenPrefix_NotPrefixUnderscoreKeyId()
{
// Auth-005: KeyPrefix is the bare token prefix ("mxgw"), NOT "mxgw_key-1". The key id is
// already its own column; embedding it produced a self-referential value that disagreed with
// the read/test paths and confused admin tooling.
ApiKeyAdminCommands commands = BuildCommands();
await commands.InitDbAsync(null, CancellationToken.None);
await commands.CreateKeyAsync(
"key-1",
"Service A",
new HashSet<string>(["read"], StringComparer.Ordinal),
constraintsJson: null,
remoteAddress: null,
CancellationToken.None);
ApiKeyRecord? found = await _read.FindByKeyIdAsync("key-1", CancellationToken.None);
Assert.NotNull(found);
Assert.Equal("mxgw", found!.KeyPrefix);
// The same bare prefix is surfaced by the admin list projection.
IReadOnlyList<ApiKeyListItem> listed = await commands.ListKeysAsync(CancellationToken.None);
ApiKeyListItem item = Assert.Single(listed, k => k.KeyId == "key-1");
Assert.Equal("mxgw", item.KeyPrefix);
}
[Fact]
public async Task CreateKey_PepperUnavailable_ReturnsNoTokenAndAppendsNoAudit()
{
@@ -212,6 +212,51 @@ public class ApiKeyVerifierTests
Assert.DoesNotContain(Convert.ToBase64String(hash), identityText, StringComparison.Ordinal);
}
// --- Auth-002: a failed best-effort MarkUsedAsync must NOT fail a valid key ---
[Fact]
public async Task VerifyAsync_ValidKey_MarkUsedThrows_StillSucceeds()
{
// MarkUsedAsync is best-effort "last used" bookkeeping. A transient storage failure
// (SQLITE_BUSY, disk full, locked DB) must not turn an otherwise-valid credential into a
// failed auth: the decision is already made before the usage write. The verifier's contract
// is "the only exception path is cancellation", so a non-cancellation MarkUsedAsync failure
// is swallowed and the result is still Succeeded == true.
byte[] hash = ApiKeySecretHasher.Hash(Secret, Pepper);
var store = new FakeApiKeyStore
{
Record = BuildRecord(hash),
MarkUsedException = new InvalidOperationException("SQLITE_BUSY"),
};
var verifier = BuildVerifier(store, new FakePepperProvider(Pepper));
ApiKeyVerification result =
await verifier.VerifyAsync(Header(KeyId, Secret), CancellationToken.None);
Assert.True(result.Succeeded);
Assert.Null(result.Failure);
Assert.NotNull(result.Identity);
Assert.Equal(KeyId, result.Identity!.KeyId);
Assert.True(store.MarkUsedCalled);
}
[Fact]
public async Task VerifyAsync_MarkUsedThrowsOperationCanceled_Propagates()
{
// The ONLY exception path is cancellation: an OperationCanceledException from the usage
// write (e.g. the request was cancelled mid-write) is honoured and re-thrown, not swallowed.
byte[] hash = ApiKeySecretHasher.Hash(Secret, Pepper);
var store = new FakeApiKeyStore
{
Record = BuildRecord(hash),
MarkUsedException = new OperationCanceledException(),
};
var verifier = BuildVerifier(store, new FakePepperProvider(Pepper));
await Assert.ThrowsAnyAsync<OperationCanceledException>(
() => verifier.VerifyAsync(Header(KeyId, Secret), CancellationToken.None));
}
// --- Cancellation ---
[Fact]
@@ -253,6 +298,9 @@ public class ApiKeyVerifierTests
public string? MarkUsedKeyId { get; private set; }
public DateTimeOffset? MarkUsedWhenUtc { get; private set; }
/// <summary>When set, <see cref="MarkUsedAsync"/> throws this exception (after recording the call).</summary>
public Exception? MarkUsedException { get; set; }
public Task<ApiKeyRecord?> FindByKeyIdAsync(string keyId, CancellationToken ct)
{
FindByKeyIdCalled = true;
@@ -267,6 +315,11 @@ public class ApiKeyVerifierTests
MarkUsedCalled = true;
MarkUsedKeyId = keyId;
MarkUsedWhenUtc = whenUtc;
if (MarkUsedException is not null)
{
return Task.FromException(MarkUsedException);
}
return Task.CompletedTask;
}
}
@@ -164,6 +164,36 @@ public sealed class SqliteApiKeyStoreTests : IAsyncLifetime
Assert.Empty(ScopeSerializer.Deserialize(""));
}
// --- Auth-003: corrupt scopes JSON must fail closed (empty set), never throw JsonException ---
[Theory]
[InlineData("not json at all")]
[InlineData("{")]
[InlineData("{\"a\":1}")] // valid JSON, but an object, not a string[]
[InlineData("42")] // valid JSON, but a number
[InlineData("[\"read\",")] // truncated/partial write
public void ScopeSerializer_DeserializeMalformed_ReturnsEmptySet_DoesNotThrow(string value)
{
// A poisoned scopes column (tampering, partial write, format change, buggy writer) must
// degrade to a zero-scope set rather than throwing on the verification hot path.
IReadOnlySet<string> scopes = ScopeSerializer.Deserialize(value);
Assert.Empty(scopes);
}
[Fact]
public async Task FindByKeyId_CorruptScopesColumn_ReturnsRecordWithEmptyScopes_DoesNotThrow()
{
// Insert a row whose scopes column holds malformed (non-array) JSON, then read it through
// the store. The store must NOT propagate a JsonException out of FindByKeyIdAsync (which the
// verifier relies on for its "only exception path is cancellation" contract).
await InsertWithRawScopesAsync("key-corrupt", scopesJson: "{ this is not valid json");
ApiKeyRecord? found = await _store.FindByKeyIdAsync("key-corrupt", CancellationToken.None);
Assert.NotNull(found);
Assert.Empty(found!.Scopes);
}
private static ApiKeyRecord SampleRecord(string keyId) => new(
KeyId: keyId,
KeyPrefix: "mxgw_ab12",
@@ -213,6 +243,33 @@ public sealed class SqliteApiKeyStoreTests : IAsyncLifetime
await command.ExecuteNonQueryAsync(CancellationToken.None);
}
private async Task InsertWithRawScopesAsync(string keyId, string scopesJson)
{
// Writes the scopes column verbatim (NOT via ScopeSerializer.Serialize) so a malformed
// value can be persisted to simulate tampering / a partial or buggy write.
await using SqliteConnection connection =
await _factory.OpenConnectionAsync(CancellationToken.None);
await using SqliteCommand command = connection.CreateCommand();
command.CommandText = """
INSERT INTO api_keys (
key_id, key_prefix, secret_hash, display_name, scopes,
constraints, created_utc, last_used_utc, revoked_utc)
VALUES (
$key_id, $key_prefix, $secret_hash, $display_name, $scopes,
$constraints, $created_utc, $last_used_utc, $revoked_utc);
""";
command.Parameters.AddWithValue("$key_id", keyId);
command.Parameters.AddWithValue("$key_prefix", "mxgw");
command.Parameters.Add("$secret_hash", SqliteType.Blob).Value = new byte[] { 1, 2, 3 };
command.Parameters.AddWithValue("$display_name", "Corrupt Key");
command.Parameters.AddWithValue("$scopes", scopesJson);
command.Parameters.AddWithValue("$constraints", DBNull.Value);
command.Parameters.AddWithValue("$created_utc", DateTimeOffset.UnixEpoch.ToString("O"));
command.Parameters.AddWithValue("$last_used_utc", DBNull.Value);
command.Parameters.AddWithValue("$revoked_utc", DBNull.Value);
await command.ExecuteNonQueryAsync(CancellationToken.None);
}
public Task DisposeAsync()
{
SqliteConnection.ClearAllPools();
@@ -1,5 +1,6 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.Auth.Abstractions.Ldap;
using ZB.MOM.WW.Auth.AspNetCore;
@@ -85,4 +86,52 @@ public class ServiceCollectionExtensionsTests
Assert.Contains(validators, v => v is LdapOptionsValidator);
}
// --- Auth-001: ValidateOnStart must run options validation at host startup, not first login ---
private static IConfiguration BuildInsecureConfiguration() =>
new ConfigurationBuilder()
.AddInMemoryCollection(new Dictionary<string, string?>
{
[$"{LdapSection}:Server"] = LdapServer,
[$"{LdapSection}:SearchBase"] = "dc=example,dc=com",
[$"{LdapSection}:ServiceAccountDn"] = "cn=svc,dc=example,dc=com",
// Plaintext transport without AllowInsecure: the validator must reject this.
[$"{LdapSection}:Transport"] = nameof(LdapTransport.None),
[$"{LdapSection}:AllowInsecure"] = "false",
})
.Build();
[Fact]
public async Task AddZbLdapAuth_StartingHost_FailsForInsecureConfig()
{
// The misconfiguration must surface at host start, not deferred until the first login
// (i.e. the first ILdapAuthService resolution). ValidateOnStart wires the host's
// start-time options validation, so StartAsync must throw OptionsValidationException.
IConfiguration config = BuildInsecureConfiguration();
using IHost host = new HostBuilder()
.ConfigureServices(services => services.AddZbLdapAuth(config, LdapSection))
.Build();
OptionsValidationException ex =
await Assert.ThrowsAsync<OptionsValidationException>(() => host.StartAsync());
Assert.Contains(nameof(LdapOptions.Transport), string.Join(" ", ex.Failures));
}
[Fact]
public async Task AddZbLdapAuth_StartingHost_SucceedsForSecureConfig()
{
// A valid (secure) config must start cleanly — proving ValidateOnStart does not reject
// well-formed options.
IConfiguration config = BuildConfiguration();
using IHost host = new HostBuilder()
.ConfigureServices(services => services.AddZbLdapAuth(config, LdapSection))
.Build();
await host.StartAsync();
await host.StopAsync();
}
}
@@ -1,3 +1,4 @@
using System.Net.Security;
using ZB.MOM.WW.Auth.Abstractions.Ldap;
using ZB.MOM.WW.Auth.Ldap.Internal;
@@ -19,6 +20,10 @@ internal sealed class FakeLdapConnection : ILdapConnection
// ---- observation -----
public (string Host, int Port, LdapTransport Transport, bool AllowInsecure, int TimeoutMs)? ConnectArgs { get; private set; }
/// <summary>The server-certificate validation callback passed to the most recent <see cref="Connect"/> call.</summary>
public RemoteCertificateValidationCallback? ConnectCertCallback { get; private set; }
public List<string> BoundDns { get; } = new();
/// <summary>
@@ -107,9 +112,16 @@ internal sealed class FakeLdapConnection : ILdapConnection
// ---- ILdapConnection -----
public void Connect(string host, int port, LdapTransport transport, bool allowInsecure, int timeoutMs)
public void Connect(
string host,
int port,
LdapTransport transport,
bool allowInsecure,
int timeoutMs,
RemoteCertificateValidationCallback? serverCertificateValidationCallback = null)
{
ConnectArgs = (host, port, transport, allowInsecure, timeoutMs);
ConnectCertCallback = serverCertificateValidationCallback;
if (_throwOnConnect)
throw new Novell.Directory.Ldap.LdapException(
"Directory unreachable", Novell.Directory.Ldap.LdapException.ConnectError, host);
@@ -1,3 +1,4 @@
using System.Net.Security;
using ZB.MOM.WW.Auth.Abstractions.Ldap;
using ZB.MOM.WW.Auth.Ldap;
@@ -80,6 +81,56 @@ public class LdapAuthServiceTests
Assert.Equal(LdapAuthFailure.Disabled, (await svc.AuthenticateAsync("a", "b", default)).Failure);
}
// --- Auth-006: TLS validation seam — allowInsecure is honoured and a cert-validation
// callback is threaded into the connection rather than being silently ignored. ---
[Fact]
public async Task Connect_ReceivesAllowInsecureFlag_FromOptions()
{
// The allowInsecure flag must reach the connection (it used to be an unused parameter).
var fake = new FakeLdapConnection().WithUserEntry(
"cn=alice,dc=x", memberOf: new[] { "cn=Engineers,ou=g,dc=x" });
var svc = new LdapAuthService(
Opts() with { AllowInsecure = true }, new FakeLdapConnectionFactory(fake));
await svc.AuthenticateAsync("alice", "pw", default);
Assert.NotNull(fake.ConnectArgs);
Assert.True(fake.ConnectArgs!.Value.AllowInsecure);
}
[Fact]
public async Task Connect_ReceivesConfiguredCertValidationCallback()
{
// A consumer-supplied RemoteCertificateValidationCallback must be passed through to the
// connection so production callers can pin a CA / validate the SAN — the seam no longer
// discards it.
RemoteCertificateValidationCallback callback = (_, _, _, _) => true;
var fake = new FakeLdapConnection().WithUserEntry(
"cn=alice,dc=x", memberOf: new[] { "cn=Engineers,ou=g,dc=x" });
var svc = new LdapAuthService(
Opts() with { ServerCertificateValidationCallback = callback },
new FakeLdapConnectionFactory(fake));
await svc.AuthenticateAsync("alice", "pw", default);
Assert.Same(callback, fake.ConnectCertCallback);
}
[Fact]
public async Task Connect_NoCertCallbackConfigured_PassesNull()
{
// Default: no callback configured -> null reaches the connection, which means the
// production adapter falls back to OS-trust-store validation (documented behaviour).
var fake = new FakeLdapConnection().WithUserEntry(
"cn=alice,dc=x", memberOf: new[] { "cn=Engineers,ou=g,dc=x" });
var svc = new LdapAuthService(Opts(), new FakeLdapConnectionFactory(fake));
await svc.AuthenticateAsync("alice", "pw", default);
Assert.Null(fake.ConnectCertCallback);
}
[Fact]
public async Task PreservesEscapedCommaInGroupName_OnRfc4514Dn()
{