Compare commits
6 Commits
3f19371017
...
a3d359fff7
| Author | SHA1 | Date | |
|---|---|---|---|
| a3d359fff7 | |||
| 84a696b0e4 | |||
| a9bd017c88 | |||
| dab0056d1b | |||
| 858fe24add | |||
| 8664cdf940 |
+121
-15
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 8 |
|
||||
| Open findings | 1 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -250,7 +250,7 @@ appropriately before the fix and pass after.
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/Actors/AkkaHostedService.cs:345` |
|
||||
|
||||
**Description**
|
||||
@@ -270,7 +270,20 @@ storeAndForwardService.StartAsync(cancellationToken)`. Propagate the
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Root cause confirmed: `RegisterSiteActors`
|
||||
called `storeAndForwardService.StartAsync().GetAwaiter().GetResult()` synchronously
|
||||
inside the `IHostedService.StartAsync` path, blocking a startup thread and wrapping
|
||||
any failure in `AggregateException`. Fix: `AkkaHostedService.StartAsync` is now
|
||||
genuinely `async Task`; the site path was renamed `RegisterSiteActorsAsync` and made
|
||||
`async`; the store-and-forward init is now `await storeAndForwardService.StartAsync()`
|
||||
(the service's own signature, in another module, takes no `CancellationToken`, so the
|
||||
token is honoured via a `cancellationToken.ThrowIfCancellationRequested()` checkpoint
|
||||
immediately before the await). No blocking, no sync-context deadlock risk, and the
|
||||
original exception type now surfaces. No dedicated regression test: the only
|
||||
observable change is non-blocking/exception-unwrapping behaviour on a path that
|
||||
requires the full site DI graph plus a real SQLite store; the existing
|
||||
`HostStartupTests.SiteRole_*` and `CompositionRootTests` continue to exercise the
|
||||
async startup pipeline and remain green (175 passed).
|
||||
|
||||
### Host-006 — HOCON assembled by unescaped string interpolation
|
||||
|
||||
@@ -278,7 +291,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Akka.NET conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/Actors/AkkaHostedService.cs:70-108` |
|
||||
|
||||
**Description**
|
||||
@@ -302,7 +315,25 @@ and seed nodes) before substitution.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Root cause confirmed: the Akka HOCON was
|
||||
built by an interpolated string injecting `NodeHostname`, `SeedNodes`, roles and
|
||||
`SplitBrainResolverStrategy` directly into the document text with no escaping — a
|
||||
value containing a quote, backslash or whitespace would corrupt or misparse the
|
||||
document. Re-triaged the recommendation: switching wholesale to the typed
|
||||
`Akka.Hosting` builder is a larger refactor than this Low finding warrants and would
|
||||
touch how the whole cluster is bootstrapped, so the HOCON form was retained but made
|
||||
safe. Fix: the HOCON construction was extracted into a testable static
|
||||
`AkkaHostedService.BuildHocon`, and every interpolated *string* value is now routed
|
||||
through a `QuoteHocon` helper that wraps the value in a double-quoted HOCON literal
|
||||
and escapes embedded backslashes and quotes — `NodeHostname`, every seed node, every
|
||||
role, and `SplitBrainResolverStrategy` (the last was previously unquoted entirely, so
|
||||
whitespace alone would break it). Regression tests in new `HoconBuilderTests`:
|
||||
`BuildHocon_HostnameWithQuote_DoesNotCorruptDocument` (a hostname with a `"` still
|
||||
parses and following keys stay intact), `BuildHocon_StrategyWithWhitespace_RemainsQuoted`,
|
||||
and `BuildHocon_PlainValues_ParsesAndPreservesValues`. With the old raw interpolation
|
||||
a quote-containing hostname produced `hostname = "evil"host"`, closing the literal
|
||||
early and corrupting following keys; the escaped form parses correctly. Full Host
|
||||
suite green (175 passed).
|
||||
|
||||
### Host-007 — REQ-HOST-4 rule "GrpcPort ≠ RemotingPort" is not enforced
|
||||
|
||||
@@ -310,7 +341,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/StartupValidator.cs:43-47` |
|
||||
|
||||
**Description**
|
||||
@@ -332,7 +363,18 @@ Add a check in the `role == "Site"` block: if `GrpcPort` (resolved, including th
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Root cause confirmed against
|
||||
`StartupValidator.cs`: the `role == "Site"` block validated the GrpcPort range but
|
||||
never compared it to `RemotingPort`, so a site config setting both ports equal
|
||||
passed validation and then failed opaquely when Kestrel and Akka.Remote both bound
|
||||
the port. Fix: added `if (port == grpcPort) errors.Add("ScadaLink:Node:GrpcPort must
|
||||
differ from RemotingPort")` in the Site block, using the resolved GrpcPort (including
|
||||
the 8083 `NodeOptions` default when the key is absent) against the parsed
|
||||
RemotingPort. Regression tests in `StartupValidatorTests`:
|
||||
`Site_GrpcPortEqualsRemotingPort_FailsValidation`,
|
||||
`Site_DefaultGrpcPortEqualsRemotingPort_FailsValidation` (default-8083 path), and
|
||||
`Site_GrpcPortDiffersFromRemotingPort_PassesValidation` — all failed appropriately
|
||||
before the fix and pass after. Full Host suite green (175 passed).
|
||||
|
||||
### Host-008 — `MachineDataDb` is validated and declared but never consumed
|
||||
|
||||
@@ -340,7 +382,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/StartupValidator.cs:33-34`, `src/ScadaLink.Host/DatabaseOptions.cs:6` |
|
||||
|
||||
**Description**
|
||||
@@ -361,7 +403,22 @@ rule, the `DatabaseOptions` property, and the key from `appsettings.Central.json
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Root cause confirmed: a repo-wide search
|
||||
shows `MachineDataDb` is read nowhere outside the Host module — only `ConfigurationDb`
|
||||
is wired into `AddConfigurationDatabase` — yet `StartupValidator` failed Central
|
||||
startup when it was absent. No machine-data store exists or is registered, so this is
|
||||
dead configuration. Fix (the "if no" branch of the recommendation): removed the
|
||||
`MachineDataDb` validation rule from `StartupValidator`, the `MachineDataDb` property
|
||||
from `DatabaseOptions`, the `MachineDataDb` key from `appsettings.Central.json`, and
|
||||
the now-pointless `MachineDataDb` reference from that file's `_secrets` note and from
|
||||
the `CentralDbTestEnvironment` test helper. Regression test: the prior
|
||||
`StartupValidatorTests.Central_MissingMachineDataDb_FailsValidation` was inverted to
|
||||
`Central_MissingMachineDataDb_PassesValidation` (absence must now NOT fail startup),
|
||||
and `MachineDataDb` was removed from the shared `ValidCentralConfig` fixture so all
|
||||
Central tests prove the key is no longer required; it failed under the old code and
|
||||
passes after. Full Host suite green (175 passed). Note: `appsettings.Central.json`
|
||||
under `docker/central-node-*` is outside this task's edit scope and still carries the
|
||||
key harmlessly — it will simply be ignored; a follow-up may remove it for tidiness.
|
||||
|
||||
### Host-009 — `StartAsync` reports success before role actors are confirmed running
|
||||
|
||||
@@ -369,7 +426,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Akka.NET conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/Actors/AkkaHostedService.cs:127-141` |
|
||||
|
||||
**Description**
|
||||
@@ -395,7 +452,24 @@ actor system exists, not that the cluster handshake has completed.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`) — re-triaged, documentation fix. Root cause
|
||||
confirmed: `SetReady` is called optimistically after `ActorOf` registrations whose
|
||||
`PreStart`/initial-contact handshakes run asynchronously. Re-triaged the first
|
||||
recommended option (gating `SetReady` on a `SiteCommunicationActor` `Ask`
|
||||
round-trip): that option is **incorrect** because the meaningful asynchronous step is
|
||||
the `ClusterClient` initial-contact handshake with central — gating site gRPC
|
||||
readiness on it would prevent a site from coming up and streaming locally while
|
||||
central is briefly unreachable, contradicting the design's site-autonomy model.
|
||||
Confirmed `SiteStreamGrpcServer` already rejects any stream opened before `SetReady`
|
||||
with `StatusCode.Unavailable`, so the window is benign. Fix (the second, correct
|
||||
option): the `SetReady` call site now carries an explicit comment documenting the
|
||||
narrow readiness contract — by `SetReady` the actor system exists,
|
||||
`SiteStreamManager.Initialize` has run, and every role actor has been created with
|
||||
ordered synchronous `ActorOf` + registration `Tell`s; what is deliberately NOT
|
||||
guaranteed is `PreStart` completion or the central handshake. No regression test is
|
||||
meaningful for a documentation-only clarification of an intentional design contract;
|
||||
the existing `HostStartupTests.SiteRole_*` cover the surrounding startup path. Full
|
||||
Host suite green (175 passed).
|
||||
|
||||
### Host-010 — No retry/backoff around startup preconditions (DB migration, readiness)
|
||||
|
||||
@@ -403,7 +477,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/Program.cs:112-125` |
|
||||
|
||||
**Description**
|
||||
@@ -425,7 +499,22 @@ reports `503` until the database becomes reachable.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Root cause confirmed: Central startup called
|
||||
`MigrationHelper.ApplyOrValidateMigrationsAsync` directly with no tolerance for a
|
||||
database that is briefly unreachable at boot — a first connection failure threw, the
|
||||
top-level `catch` logged `Fatal`, and the process exited. Fix: added a new
|
||||
`StartupRetry.ExecuteWithRetryAsync` helper (bounded retry with capped exponential
|
||||
backoff — no Polly dependency, keeping the Host's package set minimal) and wrapped
|
||||
the migration step in it (`maxAttempts: 8`, `initialDelay: 2s`, doubling, capped at
|
||||
30s). Each retry opens a fresh DI scope so a transient connection fault does not
|
||||
leave a poisoned `DbContext`; once attempts are exhausted the last exception
|
||||
propagates and the existing fatal-log path still fires. Regression tests in new
|
||||
`StartupRetryTests`: `ExecuteWithRetry_SucceedsFirstTry_RunsOnce`,
|
||||
`ExecuteWithRetry_TransientFailures_RetriesUntilSuccess` (fails twice then succeeds),
|
||||
and `ExecuteWithRetry_ExhaustsAttempts_RethrowsLastException` (verifies the original
|
||||
exception type/message surfaces after exhaustion) — they fail to compile/pass against
|
||||
the pre-fix code (the helper did not exist) and pass after. Full Host suite green
|
||||
(175 passed).
|
||||
|
||||
### Host-011 — `LoggingOptions.MinimumLevel` is dead configuration
|
||||
|
||||
@@ -433,7 +522,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Host/LoggingOptions.cs:5`, `src/ScadaLink.Host/Program.cs:42-50` |
|
||||
|
||||
**Description**
|
||||
@@ -456,4 +545,21 @@ configuration section. Keep one mechanism, not two.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Root cause confirmed: `LoggingOptions.MinimumLevel`
|
||||
was bound from `ScadaLink:Logging` but Serilog was configured purely via
|
||||
`ReadFrom.Configuration` (standard `Serilog` section), so editing
|
||||
`ScadaLink:Logging:MinimumLevel` had no effect. Re-triaged the two options: the
|
||||
"remove the option class" branch was rejected because `Component-Host.md` REQ-HOST-3
|
||||
explicitly lists `ScadaLink:Logging` → `LoggingOptions` and that design doc is outside
|
||||
this task's edit scope — removing the class would create a fresh code-vs-doc drift.
|
||||
Fix (the "consume it" branch): added a `LoggerConfigurationFactory.Build` that binds
|
||||
`LoggingOptions`, parses `MinimumLevel` into a Serilog `LogEventLevel` (falling back
|
||||
to `Information` for null/blank/unrecognised values), and pins it via
|
||||
`MinimumLevel.Is(...)` on top of `ReadFrom.Configuration`; `Program.cs` now builds the
|
||||
logger through this factory. `ScadaLink:Logging:MinimumLevel` is now live.
|
||||
Regression tests in new `LoggerConfigurationTests`:
|
||||
`MinimumLevel_Warning_SuppressesInformationLogs`,
|
||||
`MinimumLevel_Debug_AllowsDebugLogs`, and `MinimumLevel_Absent_DefaultsToInformation`
|
||||
— they assert the configured level actually filters log events; they pass against the
|
||||
new factory (and would fail against the old inline configuration, which ignored the
|
||||
key). Full Host suite green (175 passed).
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 6 |
|
||||
| Open findings | 2 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -386,7 +386,7 @@ actually enforced in production; until then the endpoint defaults to "allow".
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.InboundAPI/InboundScriptExecutor.cs:123-128` |
|
||||
|
||||
**Description**
|
||||
@@ -406,7 +406,16 @@ updated via `CompileAndRegister`.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): confirmed the root cause — a failed `Compile`
|
||||
stored nothing in `_scriptHandlers`, so every subsequent request re-entered the
|
||||
lazy-compile branch and re-ran Roslyn. Added a `_knownBadMethods` `ConcurrentDictionary`
|
||||
of method names whose compilation failed; `ExecuteAsync`'s lazy-compile path
|
||||
short-circuits before Roslyn when the method is already known-bad, and records the
|
||||
failure on a fresh failed compile. `CompileAndRegister` also records failures and
|
||||
clears the record on a successful (re)compile, so a fixed method definition is
|
||||
re-evaluated. Regression tests `FailedCompilation_IsNotRetriedOnEveryRequest`
|
||||
(asserts the compile-failure log fires exactly once across 5 requests) and
|
||||
`FailedCompilation_RecompilesAfterCompileAndRegisterCalledAgain` added.
|
||||
|
||||
### InboundAPI-010 — `ParameterValidator` ignores extra body fields and cannot validate Object/List element types
|
||||
|
||||
@@ -414,7 +423,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.InboundAPI/ParameterValidator.cs:64-90`, `:112-118` |
|
||||
|
||||
**Description**
|
||||
@@ -438,7 +447,20 @@ shape — and update the design doc to match.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): both gaps addressed along the lines the
|
||||
recommendation offers. (1) `ParameterValidator.Validate` now enumerates the request
|
||||
body's top-level fields and returns an Invalid result naming any field that does not
|
||||
match a defined parameter (`"Unexpected parameter(s): ..."`), so a typo'd parameter
|
||||
name is reported instead of silently ignored. (2) For `Object`/`List`, recursive
|
||||
field/element-level type validation is **deliberately not** added — it requires a
|
||||
richer nested `ParameterDefinition` schema, a design decision; instead the
|
||||
shape-only behaviour is now explicitly documented in the `CoerceValue` XML comment so
|
||||
the code's contract is unambiguous. Re-triage note: the design doc
|
||||
(`Component-InboundAPI.md` line 43) lists only Boolean/Integer/Float/String as method
|
||||
parameter types — the Object/List extended types are a CLAUDE.md decision; reconciling
|
||||
the design doc is out of this module's editable scope and left as a doc-owner
|
||||
follow-up. Regression tests `UnexpectedBodyField_ReturnsInvalid` and
|
||||
`OnlyDefinedFields_StillValid` added.
|
||||
|
||||
### InboundAPI-011 — Method-existence check leaks to unapproved callers (enumeration oracle)
|
||||
|
||||
@@ -446,7 +468,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Security |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.InboundAPI/ApiKeyValidator.cs:39-52` |
|
||||
|
||||
**Description**
|
||||
@@ -466,7 +488,18 @@ raw caller input in error bodies, or sanitize it.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): confirmed — `ValidateAsync` returned 400
|
||||
`Method '{methodName}' not found` for a missing method but 403
|
||||
`API key not approved for this method` for an existing-but-unapproved one, an
|
||||
enumeration oracle, and echoed the caller-supplied method name verbatim. Both cases
|
||||
now return an identical response: HTTP 403 with the single shared message
|
||||
`API key not approved for this method` (a `NotApprovedMessage` constant); the
|
||||
method name is no longer interpolated into any error body, removing both the
|
||||
existence oracle and the reflected-input concern. Regression tests
|
||||
`ValidKey_MethodNotFound_IsIndistinguishableFromNotApproved` and
|
||||
`ValidKey_MethodNotFound_ErrorMessageDoesNotEchoMethodName` added; the pre-existing
|
||||
`ValidKey_MethodNotFound_Returns400` test was updated to assert the new
|
||||
indistinguishable contract.
|
||||
|
||||
### InboundAPI-012 — `ParameterDefinition` POCO declared in the component project, not Commons
|
||||
|
||||
@@ -495,7 +528,18 @@ components that work with method definitions.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
_Unresolved — left Open; the fix crosses this module's editable boundary._
|
||||
Re-triage 2026-05-16: confirmed against the source — `ParameterDefinition`
|
||||
(`ParameterValidator.cs:128-133`) is indeed an API-contract-shaped POCO declared in
|
||||
the component project. However the recommended fix is to **create the type in
|
||||
`ScadaLink.Commons`** (and update the validator plus any other consumers to reference
|
||||
it), which edits files outside this module's editable scope
|
||||
(`src/ScadaLink.InboundAPI`, `tests/`, this file only). It also touches a shared
|
||||
contract type: a Commons namespace placement and a likely-paired return-definition
|
||||
type are a cross-component code-organization decision. **Surface to the design/Commons
|
||||
owner:** add `ParameterDefinition` (and a return-definition counterpart) under a
|
||||
`ScadaLink.Commons` InboundApi types namespace, then repoint `ParameterValidator` and
|
||||
any other consumers at it.
|
||||
|
||||
### InboundAPI-013 — `ApiKeyValidationResult.NotFound` factory returns HTTP 400, contradicting its name
|
||||
|
||||
@@ -503,7 +547,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.InboundAPI/ApiKeyValidator.cs:78-79` |
|
||||
|
||||
**Description**
|
||||
@@ -523,4 +567,14 @@ does not list it.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): the misnamed `NotFound` factory (which built a
|
||||
`StatusCode = 400` result) was the only producer of the "method not found" result,
|
||||
and the InboundAPI-011 fix made "method not found" return 403 via the existing
|
||||
`Forbidden` factory instead. The misleading `NotFound` factory is therefore now
|
||||
**removed entirely** — it has no remaining callers in or out of the module
|
||||
(`ApiKeyValidationResult` is InboundAPI-internal), eliminating the name/behaviour
|
||||
contradiction. No separate regression test is needed: the InboundAPI-011 tests cover
|
||||
the new method-not-found status, and removing dead code cannot regress. Doc-owner
|
||||
follow-up: `Component-InboundAPI.md`'s Error Handling section still does not list a
|
||||
"method not found" status; it should note that it is reported as 403 (indistinguishable
|
||||
from "key not approved"), but that doc edit is outside this module's editable scope.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 5 |
|
||||
| Open findings | 0 (1 Deferred — see ManagementService-012) |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -211,7 +211,7 @@ mapping tests confirm behaviour is preserved.
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Akka.NET conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ManagementService/ManagementActor.cs:33` |
|
||||
|
||||
**Description**
|
||||
@@ -230,7 +230,15 @@ Resume-based strategy, consistent with other central coordinator actors.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Confirmed: `ManagementActor` declared no
|
||||
`SupervisorStrategy`. Added a `public static SupervisorStrategy CreateSupervisorStrategy()`
|
||||
factory returning an unbounded `OneForOneStrategy` with a `Directive.Resume` decider, and a
|
||||
`protected override SupervisorStrategy()` that delegates to it — matching the Resume-based
|
||||
convention of `CentralCommunicationActor`/`SiteCommunicationActor`. The actor spawns no
|
||||
children today, so this is a forward-looking correctness fix. Regression tests:
|
||||
`CreateSupervisorStrategy_ReturnsOneForOneStrategy`,
|
||||
`CreateSupervisorStrategy_ResumesOnArbitraryException`,
|
||||
`CreateSupervisorStrategy_ResumesIndefinitely` (new `ManagementActorSupervisionTests.cs`).
|
||||
|
||||
### ManagementService-006 — JsonDocument instances never disposed in the HTTP endpoint
|
||||
|
||||
@@ -312,7 +320,7 @@ Regression tests: `SerializeResult_WithCyclicGraph_DoesNotThrow`,
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ManagementService/ManagementActor.cs:285` |
|
||||
|
||||
**Description**
|
||||
@@ -329,7 +337,15 @@ Resolve `RoleMapper` via `sp.GetRequiredService<RoleMapper>()` like every other
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Confirmed: `HandleResolveRoles` did
|
||||
`new RoleMapper(sp.GetRequiredService<ISecurityRepository>())`, bypassing the
|
||||
`AddScoped<RoleMapper>()` DI registration. The hand-built `RoleMapper` lived only inside
|
||||
`HandleResolveRoles`, which is itself the dead-code dispatch path removed under finding 011
|
||||
(the two-step ResolveRoles flow is retired). Resolving 011 by deleting the
|
||||
`ResolveRolesCommand` dispatch case and `HandleResolveRoles` handler also removes the only
|
||||
manually-constructed `RoleMapper` in the module, so the DI-bypass no longer exists. No
|
||||
remaining `new RoleMapper` in `src/ScadaLink.ManagementService`. Regression covered by
|
||||
`ResolveRolesCommand_IsNoLongerDispatched_ReturnsManagementError`.
|
||||
|
||||
### ManagementService-009 — Audit logging applied inconsistently across mutating handlers
|
||||
|
||||
@@ -385,7 +401,7 @@ covers the explicit-audit path.
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ManagementService/ManagementServiceOptions.cs:5`; `src/ScadaLink.ManagementService/ManagementEndpoints.cs:16` |
|
||||
|
||||
**Description**
|
||||
@@ -405,7 +421,15 @@ configuration cannot be set with no effect.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Confirmed: `ManagementEndpoints` hard-coded
|
||||
`AskTimeout = TimeSpan.FromSeconds(30)` and never read `ManagementServiceOptions.CommandTimeout`.
|
||||
`HandleRequest` now resolves `IOptions<ManagementServiceOptions>` from `context.RequestServices`
|
||||
and computes the Ask timeout via a new `ManagementEndpoints.ResolveAskTimeout` helper, which
|
||||
returns the configured `CommandTimeout` when strictly positive and otherwise falls back to the
|
||||
30s default (guarding against a misconfigured zero/negative value that would fail every call).
|
||||
Regression tests: `ResolveAskTimeout_UsesConfiguredCommandTimeout`,
|
||||
`ResolveAskTimeout_WithNullOptions_FallsBackToDefault`,
|
||||
`ResolveAskTimeout_WithNonPositiveTimeout_FallsBackToDefault`.
|
||||
|
||||
### ManagementService-011 — ResolveRolesCommand dispatch path is stale dead code
|
||||
|
||||
@@ -413,7 +437,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ManagementService/ManagementActor.cs:273`, `:283` |
|
||||
|
||||
**Description**
|
||||
@@ -435,7 +459,19 @@ data unauthenticated is intended.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Confirmed dead path: a repository-wide search found no
|
||||
`ResolveRolesCommand` sender outside `ManagementActor` itself — the CLI and HTTP endpoint
|
||||
perform LDAP auth + role resolution inline. Removed the `ResolveRolesCommand` dispatch case
|
||||
and the `HandleResolveRoles` handler from `ManagementActor`; a stray ClusterClient sender now
|
||||
falls through to the `NotSupportedException` default and gets a uniform `ManagementError`
|
||||
(closing the unauthenticated role-mapping enumeration surface, since `GetRequiredRole`
|
||||
returned null for it). A code comment at the former dispatch site documents the intentional
|
||||
omission. Note: the `ResolveRolesCommand` *record* itself lives in
|
||||
`src/ScadaLink.Commons/Messages/Management/SecurityCommands.cs` and was left in place — that
|
||||
file is outside this module's permitted edit scope; deleting the orphan record should be done
|
||||
as a Commons-module follow-up. With the handler removed it is now an inert,
|
||||
registry-only type with no behaviour. Regression test:
|
||||
`ResolveRolesCommand_IsNoLongerDispatched_ReturnsManagementError`.
|
||||
|
||||
### ManagementService-012 — ManagementEnvelope carries a loosely-typed object payload
|
||||
|
||||
@@ -443,7 +479,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Akka.NET conventions |
|
||||
| Status | Open |
|
||||
| Status | Deferred |
|
||||
| Location | `src/ScadaLink.Commons/Messages/Management/ManagementEnvelope.cs:7`; `src/ScadaLink.ManagementService/ManagementActor.cs:132` |
|
||||
|
||||
**Description**
|
||||
@@ -463,7 +499,16 @@ flag unhandled cases, and keeps `ManagementCommandRegistry`'s reflection scan pr
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Deferred 2026-05-16. Finding verified as genuine: `ManagementEnvelope.Command` is typed
|
||||
`object` and the recommended `IManagementCommand` marker-interface fix is sound. However, the
|
||||
fix cannot be implemented within the `ManagementService` module: both `ManagementEnvelope` and
|
||||
all ~50 `*Command` records live in `src/ScadaLink.Commons/Messages/Management/` (17 files),
|
||||
which is outside this work item's permitted edit scope (`src/ScadaLink.ManagementService/**`,
|
||||
its tests, and this findings file only). Adding the marker interface, retyping the envelope,
|
||||
and having `ManagementCommandRegistry` constrain its reflection scan to `IManagementCommand`
|
||||
implementers is a cohesive Commons-module change and must be done there — also so the Commons
|
||||
message-contract additive-only evolution rules are respected. Deferred to a Commons-module
|
||||
work item; no `ManagementService`-local change is appropriate.
|
||||
|
||||
### ManagementService-013 — No tests for site-scope enforcement, the HTTP endpoint, or DebugStreamHub
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 3 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -360,7 +360,7 @@ under NotificationService-009.
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:121-154` |
|
||||
|
||||
**Description**
|
||||
@@ -373,7 +373,16 @@ Move disconnect/dispose into a `finally` block (or use `await using` once `ISmtp
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Root cause confirmed against the current source:
|
||||
`DeliverAsync` invoked `smtp.DisconnectAsync` only on the success path inside the `try`
|
||||
block, so a failed `Connect`/`Authenticate`/`Send` left the connection open until
|
||||
finalization. `DisconnectAsync` was moved into the existing `finally` block (which also
|
||||
releases the NS-007 concurrency slot), so the SMTP connection is always torn down
|
||||
regardless of outcome. The disconnect is best-effort — wrapped in its own `try/catch` that
|
||||
logs at debug level — so a disconnect failure (e.g. the connection is already dead) cannot
|
||||
mask the original delivery exception. Regression tests
|
||||
`Send_TransientFailureDuringSend_StillDisconnectsClient` and
|
||||
`Send_FailureDuringAuthenticate_StillDisconnectsClient`.
|
||||
|
||||
### NotificationService-011 — `SmtpPermanentException` declared in the wrong file; module conventions
|
||||
|
||||
@@ -381,7 +390,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:173-177`, `src/ScadaLink.Commons/Entities/Notifications/SmtpConfiguration.cs:5-15` |
|
||||
|
||||
**Description**
|
||||
@@ -394,7 +403,22 @@ Move `SmtpPermanentException` to its own file. For `SmtpConfiguration`, either k
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Both issues confirmed against source.
|
||||
|
||||
1. **`SmtpPermanentException` placement (in scope — fixed).** The public exception type
|
||||
was extracted from the bottom of `NotificationDeliveryService.cs` into its own file,
|
||||
`src/ScadaLink.NotificationService/SmtpPermanentException.cs`, restoring the
|
||||
one-type-per-file layout. No behaviour change, so no dedicated regression test — the
|
||||
move is verified by the module test suite still compiling and passing (56 tests green).
|
||||
2. **`SmtpConfiguration` non-nullable strings (out of scope — re-triaged).**
|
||||
`SmtpConfiguration` lives in `src/ScadaLink.Commons`, which is outside the
|
||||
NotificationService module's edit scope; it cannot be changed here. This is the same
|
||||
Commons entity already flagged for follow-up under **NotificationService-013**
|
||||
(Deferred). The `required`-members / documented-constructor change should be folded into
|
||||
that Commons/ConfigurationDatabase-scoped work. Note the delivery service's actual risk
|
||||
is bounded: `SendAsync`/`DeliverBufferedAsync` already validate `TlsMode` and addresses
|
||||
up front (NS-005/NS-008), and a null `Host`/`AuthType` from a malformed config row
|
||||
surfaces as a classified/clean failure rather than silent corruption.
|
||||
|
||||
### NotificationService-012 — Test coverage gaps: OAuth2 delivery path, permanent-classification fallback, token-cache concurrency
|
||||
|
||||
@@ -402,7 +426,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs`, `tests/ScadaLink.NotificationService.Tests/OAuth2TokenServiceTests.cs` |
|
||||
|
||||
**Description**
|
||||
@@ -415,4 +439,29 @@ Add tests for: OAuth2-authenticated send with a mocked `OAuth2TokenService`; the
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Each listed gap was re-triaged against the current
|
||||
source (the module was substantially rewritten by NS-001..009 since this finding was
|
||||
filed) and the genuinely-missing tests were added:
|
||||
|
||||
1. **OAuth2 delivery path (gap real — covered).** New test
|
||||
`Send_OAuth2Config_AuthenticatesWithResolvedAccessToken` drives `DeliverAsync` with an
|
||||
`oauth2` `SmtpConfiguration` and a real `OAuth2TokenService` (mocked `IHttpClientFactory`),
|
||||
asserting the SMTP client is authenticated with the *resolved access token*, not the raw
|
||||
`tenant:client:secret` triple.
|
||||
2. **5xx-message permanent fallback (gap obsolete — re-triaged).** The substring-based
|
||||
permanent fallback this item describes no longer exists: NS-003 replaced message-text
|
||||
matching with typed-exception/`SmtpStatusCode` classification. The equivalent path is
|
||||
already covered by `Send_Smtp5xxCommandException_ClassifiedPermanent`, and
|
||||
`Send_NonSmtpExceptionWith5xxLookalikeText_NotPromotedToPermanent` proves a "5xx
|
||||
lookalike" message is no longer promoted. No new test needed for a removed branch.
|
||||
3. **Token expiry/refresh and concurrent acquisition (gaps real — covered).** New tests
|
||||
`GetTokenAsync_ExpiredToken_RefreshesOnNextCall` (uses `expires_in: 60` so the token is
|
||||
stale immediately given the 60s refresh margin) and
|
||||
`GetTokenAsync_ConcurrentCalls_MakeExactlyOneHttpRequest` (20 racing callers collapse to
|
||||
a single token fetch, exercising the per-credential double-checked lock).
|
||||
4. **End-to-end buffer-and-retry (gap already closed — re-triaged).** The
|
||||
buffer-then-retry-then-deliver round trip was added when NS-001 was resolved
|
||||
(`DeliverBuffered_HappyPath_ReturnsTrue`, `DeliverBuffered_ListNoLongerExists_…`, plus
|
||||
`AkkaHostedService` handler registration), so no further test is required here.
|
||||
|
||||
Module test suite is green at 56 tests.
|
||||
|
||||
+8
-31
@@ -42,8 +42,8 @@ module file and counted in **Total**.
|
||||
| Critical | 0 |
|
||||
| High | 0 |
|
||||
| Medium | 4 |
|
||||
| Low | 46 |
|
||||
| **Total** | **50** |
|
||||
| Low | 23 |
|
||||
| **Total** | **27** |
|
||||
|
||||
## Module Status
|
||||
|
||||
@@ -59,11 +59,11 @@ module file and counted in **Total**.
|
||||
| [DeploymentManager](DeploymentManager/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/1 | 1 | 14 |
|
||||
| [ExternalSystemGateway](ExternalSystemGateway/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/1 | 1 | 14 |
|
||||
| [HealthMonitoring](HealthMonitoring/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 12 |
|
||||
| [Host](Host/findings.md) | 2026-05-16 | `9c60592` | 0/0/1/7 | 8 | 11 |
|
||||
| [InboundAPI](InboundAPI/findings.md) | 2026-05-16 | `9c60592` | 0/0/1/5 | 6 | 13 |
|
||||
| [ManagementService](ManagementService/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/5 | 5 | 13 |
|
||||
| [NotificationService](NotificationService/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/3 | 3 | 13 |
|
||||
| [Security](Security/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/4 | 4 | 11 |
|
||||
| [Host](Host/findings.md) | 2026-05-16 | `9c60592` | 0/0/1/0 | 1 | 11 |
|
||||
| [InboundAPI](InboundAPI/findings.md) | 2026-05-16 | `9c60592` | 0/0/1/1 | 2 | 13 |
|
||||
| [ManagementService](ManagementService/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 13 |
|
||||
| [NotificationService](NotificationService/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 13 |
|
||||
| [Security](Security/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 11 |
|
||||
| [SiteEventLogging](SiteEventLogging/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/3 | 3 | 11 |
|
||||
| [SiteRuntime](SiteRuntime/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/5 | 5 | 16 |
|
||||
| [StoreAndForward](StoreAndForward/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/7 | 7 | 14 |
|
||||
@@ -93,37 +93,14 @@ _None open._
|
||||
| Host-002 | [Host](Host/findings.md) | Akka.Persistence required by REQ-HOST-6 is not configured and not used |
|
||||
| InboundAPI-007 | [InboundAPI](InboundAPI/findings.md) | `Database.Connection()` script API from the design doc is not implemented |
|
||||
|
||||
### Low (46)
|
||||
### Low (23)
|
||||
|
||||
| ID | Module | Title |
|
||||
|----|--------|-------|
|
||||
| Commons-008 | [Commons](Commons/findings.md) | `SetConnectionBindingsCommand` uses `ValueTuple` in a wire message contract |
|
||||
| DeploymentManager-013 | [DeploymentManager](DeploymentManager/findings.md) | SMTP credentials serialized and broadcast to all sites |
|
||||
| ExternalSystemGateway-011 | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | Every call performs a full repository scan of all systems and methods |
|
||||
| Host-005 | [Host](Host/findings.md) | Blocking sync-over-async (`GetAwaiter().GetResult()`) inside `StartAsync` |
|
||||
| Host-006 | [Host](Host/findings.md) | HOCON assembled by unescaped string interpolation |
|
||||
| Host-007 | [Host](Host/findings.md) | REQ-HOST-4 rule "GrpcPort ≠ RemotingPort" is not enforced |
|
||||
| Host-008 | [Host](Host/findings.md) | `MachineDataDb` is validated and declared but never consumed |
|
||||
| Host-009 | [Host](Host/findings.md) | `StartAsync` reports success before role actors are confirmed running |
|
||||
| Host-010 | [Host](Host/findings.md) | No retry/backoff around startup preconditions (DB migration, readiness) |
|
||||
| Host-011 | [Host](Host/findings.md) | `LoggingOptions.MinimumLevel` is dead configuration |
|
||||
| InboundAPI-009 | [InboundAPI](InboundAPI/findings.md) | Failed compilation is retried on every subsequent request |
|
||||
| InboundAPI-010 | [InboundAPI](InboundAPI/findings.md) | `ParameterValidator` ignores extra body fields and cannot validate Object/List element types |
|
||||
| InboundAPI-011 | [InboundAPI](InboundAPI/findings.md) | Method-existence check leaks to unapproved callers (enumeration oracle) |
|
||||
| InboundAPI-012 | [InboundAPI](InboundAPI/findings.md) | `ParameterDefinition` POCO declared in the component project, not Commons |
|
||||
| InboundAPI-013 | [InboundAPI](InboundAPI/findings.md) | `ApiKeyValidationResult.NotFound` factory returns HTTP 400, contradicting its name |
|
||||
| ManagementService-005 | [ManagementService](ManagementService/findings.md) | ManagementActor declares no supervision strategy |
|
||||
| ManagementService-008 | [ManagementService](ManagementService/findings.md) | HandleResolveRoles constructs RoleMapper manually instead of via DI |
|
||||
| ManagementService-010 | [ManagementService](ManagementService/findings.md) | ManagementServiceOptions.CommandTimeout is defined but never used |
|
||||
| ManagementService-011 | [ManagementService](ManagementService/findings.md) | ResolveRolesCommand dispatch path is stale dead code |
|
||||
| ManagementService-012 | [ManagementService](ManagementService/findings.md) | ManagementEnvelope carries a loosely-typed object payload |
|
||||
| NotificationService-010 | [NotificationService](NotificationService/findings.md) | `DeliverAsync` does not disconnect the SMTP client on failure |
|
||||
| NotificationService-011 | [NotificationService](NotificationService/findings.md) | `SmtpPermanentException` declared in the wrong file; module conventions |
|
||||
| NotificationService-012 | [NotificationService](NotificationService/findings.md) | Test coverage gaps: OAuth2 delivery path, permanent-classification fallback, token-cache concurrency |
|
||||
| Security-008 | [Security](Security/findings.md) | N+1 query loading site-scope rules in `RoleMapper` |
|
||||
| Security-009 | [Security](Security/findings.md) | CancellationToken not honored inside `Task.Run` LDAP calls |
|
||||
| Security-010 | [Security](Security/findings.md) | Design doc contradicts itself on Windows Integrated Authentication |
|
||||
| Security-011 | [Security](Security/findings.md) | Missing tests for security-critical paths |
|
||||
| SiteEventLogging-006 | [SiteEventLogging](SiteEventLogging/findings.md) | Missing indexes for severity and keyword-search query paths |
|
||||
| SiteEventLogging-009 | [SiteEventLogging](SiteEventLogging/findings.md) | XML doc on `LogEventAsync` claims asynchronous behaviour |
|
||||
| SiteEventLogging-011 | [SiteEventLogging](SiteEventLogging/findings.md) | Stale "Phase 4+" placeholder in `ServiceCollectionExtensions` |
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 4 |
|
||||
| Open findings | 0 (1 deferred — Security-008) |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -315,7 +315,7 @@ Security-side defect — the reset-on-refresh bug — is fully fixed here. Regre
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Status | Open |
|
||||
| Status | Deferred |
|
||||
| Location | `src/ScadaLink.Security/RoleMapper.cs:25-48` |
|
||||
|
||||
**Description**
|
||||
@@ -333,7 +333,18 @@ Add a repository method that loads scope rules for a set of mapping IDs in one q
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Deferred 2026-05-16 (commit `pending commit`). Finding confirmed accurate: `RoleMapper.MapGroupsToRolesAsync`
|
||||
issues one `GetScopeRulesForMappingAsync` round-trip per matched Deployment mapping — a
|
||||
genuine N+1 on the login / 15-minute-refresh path. However, the only correct fix
|
||||
(a batch `GetScopeRulesForMappingsAsync(IEnumerable<int>)` repository method, or an
|
||||
eager-load navigation property) requires changes to `ISecurityRepository`
|
||||
(`src/ScadaLink.Commons`) and `SecurityRepository` (`src/ScadaLink.ConfigurationDatabase`).
|
||||
Both are outside the Security module's permitted edit scope for this review pass, and the
|
||||
existing `ISecurityRepository` surface offers no per-set scope-rule query, so the N+1
|
||||
cannot be removed from within `RoleMapper.cs` alone. Severity is Low (bounded by the
|
||||
number of site-scoped Deployment groups, typically small). Deferred to a cross-module
|
||||
change that adds the batch repository method; `RoleMapper` should then resolve all scope
|
||||
rules in a single call.
|
||||
|
||||
### Security-009 — CancellationToken not honored inside `Task.Run` LDAP calls
|
||||
|
||||
@@ -341,7 +352,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.Security/LdapAuthService.cs:42`, `:46`, `:51`, `:56-57`, `:67-73`, `:135`, `:139-145` |
|
||||
|
||||
**Description**
|
||||
@@ -361,7 +372,17 @@ work-item scheduling, or implement a timeout-with-disconnect fallback.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending commit`). Confirmed: the synchronous Novell LDAP
|
||||
calls were wrapped in `Task.Run(..., ct)` where `ct` only prevents the work item from
|
||||
starting and cannot interrupt an in-progress blocking `Connect`/`Bind`/`Search`, and no
|
||||
network/operation timeout was configured on the `LdapConnection`. Added a configurable
|
||||
`SecurityOptions.LdapConnectionTimeoutMs` (default 10s) and a new `ApplyConnectionTimeout`
|
||||
helper that sets both `LdapConnection.ConnectionTimeout` (socket connect) and
|
||||
`LdapConstraints.TimeLimit` (per-operation limit) before connecting, so a hung LDAP
|
||||
server can no longer pin a thread-pool thread indefinitely. The `ct`-only-guards-scheduling
|
||||
limitation is now documented in the option's XML doc and an inline comment. Regression
|
||||
tests `SecurityOptions_LdapConnectionTimeout_HasSaneDefault` and
|
||||
`AuthenticateAsync_UnreachableHost_FailsWithinConfiguredTimeout`.
|
||||
|
||||
### Security-010 — Design doc contradicts itself on Windows Integrated Authentication
|
||||
|
||||
@@ -369,7 +390,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `docs/requirements/Component-Security.md:13` (vs. `:23`) |
|
||||
|
||||
**Description**
|
||||
@@ -388,7 +409,14 @@ to match the implemented behavior and the rest of the document.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending commit`). Confirmed: the Responsibilities bullet at
|
||||
line 13 said "using Windows Integrated Authentication", directly contradicting the
|
||||
Authentication section's "No Windows Integrated Authentication ... authenticates directly
|
||||
against LDAP/AD, not via Kerberos/NTLM", CLAUDE.md, and the implementation (`LdapAuthService`
|
||||
performs a direct username/password bind). Documentation-only finding — no regression test
|
||||
is meaningful. Reworded the Responsibilities bullet to "Authenticate users against
|
||||
LDAP/Active Directory using a direct LDAP/AD bind (username/password)", matching the rest
|
||||
of the document and the code.
|
||||
|
||||
### Security-011 — Missing tests for security-critical paths
|
||||
|
||||
@@ -396,7 +424,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `tests/ScadaLink.Security.Tests/UnitTest1.cs` |
|
||||
|
||||
**Description**
|
||||
@@ -417,4 +445,14 @@ DN-escaping of hostile usernames, and idle-timeout behavior across a refresh. Re
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending commit`). Re-triage: most of the listed gaps were
|
||||
already closed by the Security-001..007 fixes — the regression classes
|
||||
`SecurityReviewRegressionTests` (StartTLS path, JWT empty/short-key rejection) and
|
||||
`SecurityReviewRegressionTests2` (DN-injection / hostile-username escaping, `uid`/`cn`
|
||||
fallback consistency, idle-timeout preserved across refresh) cover them. The finding's
|
||||
reference to a `SecurityHardeningTests` class is stale — no such class exists in the
|
||||
suite. Remaining gaps closed here: renamed the test file `UnitTest1.cs` →
|
||||
`SecurityTests.cs`, and added a new `SecurityReviewRegressionTests3` class with the
|
||||
LDAP-timeout coverage (Security-009) plus extra no-service-account / DN-path edge cases
|
||||
(`BuildFallbackUserDn_NoSearchBase_ReturnsBareRdn`, `EscapeLdapDn_LeadingHash_IsEscaped`,
|
||||
`EscapeLdapDn_NullOrEmpty_ReturnedUnchanged`). Full module suite: 54 tests, all green.
|
||||
|
||||
@@ -10,7 +10,7 @@ Central cluster. Sites do not have user-facing interfaces and do not perform ind
|
||||
|
||||
## Responsibilities
|
||||
|
||||
- Authenticate users against LDAP/Active Directory using Windows Integrated Authentication.
|
||||
- Authenticate users against LDAP/Active Directory using a direct LDAP/AD bind (username/password).
|
||||
- Map LDAP group memberships to system roles.
|
||||
- Enforce role-based access control on all API and UI operations.
|
||||
- Support site-scoped permissions for the Deployment role.
|
||||
|
||||
@@ -54,58 +54,20 @@ public class AkkaHostedService : IHostedService
|
||||
/// </summary>
|
||||
public ActorSystem? ActorSystem => _actorSystem;
|
||||
|
||||
public Task StartAsync(CancellationToken cancellationToken)
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var seedNodesStr = string.Join(",",
|
||||
_clusterOptions.SeedNodes.Select(s => $"\"{s}\""));
|
||||
|
||||
// For site nodes, include a site-specific role (e.g., "site-SiteA") alongside the base role
|
||||
var roles = BuildRoles();
|
||||
var rolesStr = string.Join(",", roles.Select(r => $"\"{r}\""));
|
||||
|
||||
// WP-3: Transport heartbeat explicitly configured from CommunicationOptions (not framework defaults)
|
||||
var transportHeartbeatSec = _communicationOptions.TransportHeartbeatInterval.TotalSeconds;
|
||||
var transportFailureSec = _communicationOptions.TransportFailureThreshold.TotalSeconds;
|
||||
|
||||
var hocon = $@"
|
||||
akka {{
|
||||
extensions = [
|
||||
""Akka.Cluster.Tools.PublishSubscribe.DistributedPubSubExtensionProvider, Akka.Cluster.Tools""
|
||||
]
|
||||
actor {{
|
||||
provider = cluster
|
||||
}}
|
||||
remote {{
|
||||
dot-netty.tcp {{
|
||||
hostname = ""{_nodeOptions.NodeHostname}""
|
||||
port = {_nodeOptions.RemotingPort}
|
||||
}}
|
||||
transport-failure-detector {{
|
||||
heartbeat-interval = {transportHeartbeatSec:F0}s
|
||||
acceptable-heartbeat-pause = {transportFailureSec:F0}s
|
||||
}}
|
||||
}}
|
||||
cluster {{
|
||||
seed-nodes = [{seedNodesStr}]
|
||||
roles = [{rolesStr}]
|
||||
min-nr-of-members = {_clusterOptions.MinNrOfMembers}
|
||||
split-brain-resolver {{
|
||||
active-strategy = {_clusterOptions.SplitBrainResolverStrategy}
|
||||
stable-after = {_clusterOptions.StableAfter.TotalSeconds:F0}s
|
||||
keep-oldest {{
|
||||
down-if-alone = on
|
||||
}}
|
||||
}}
|
||||
failure-detector {{
|
||||
heartbeat-interval = {_clusterOptions.HeartbeatInterval.TotalSeconds:F0}s
|
||||
acceptable-heartbeat-pause = {_clusterOptions.FailureDetectionThreshold.TotalSeconds:F0}s
|
||||
}}
|
||||
run-coordinated-shutdown-when-down = on
|
||||
}}
|
||||
coordinated-shutdown {{
|
||||
run-by-clr-shutdown-hook = on
|
||||
}}
|
||||
}}";
|
||||
// Host-006: HOCON is assembled in a dedicated builder that quotes/escapes every
|
||||
// interpolated value, so a hostname, seed node or strategy containing a quote,
|
||||
// backslash or whitespace cannot corrupt the configuration document.
|
||||
var hocon = BuildHocon(_nodeOptions, _clusterOptions, roles,
|
||||
transportHeartbeatSec, transportFailureSec);
|
||||
|
||||
var config = ConfigurationFactory.ParseString(hocon);
|
||||
_actorSystem = ActorSystem.Create("scadalink", config);
|
||||
@@ -135,10 +97,78 @@ akka {{
|
||||
}
|
||||
else if (_nodeOptions.Role.Equals("Site", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
RegisterSiteActors();
|
||||
await RegisterSiteActorsAsync(cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
/// <summary>
|
||||
/// Builds the Akka HOCON configuration document. Every interpolated value is
|
||||
/// routed through <see cref="QuoteHocon"/> (string values) so a hostname,
|
||||
/// seed-node URI, role or split-brain strategy containing a quote, backslash or
|
||||
/// whitespace cannot corrupt the document or be silently misparsed (Host-006).
|
||||
/// </summary>
|
||||
public static string BuildHocon(
|
||||
NodeOptions nodeOptions,
|
||||
ClusterOptions clusterOptions,
|
||||
IEnumerable<string> roles,
|
||||
double transportHeartbeatSec,
|
||||
double transportFailureSec)
|
||||
{
|
||||
var seedNodesStr = string.Join(",",
|
||||
clusterOptions.SeedNodes.Select(QuoteHocon));
|
||||
var rolesStr = string.Join(",", roles.Select(QuoteHocon));
|
||||
|
||||
return $@"
|
||||
akka {{
|
||||
extensions = [
|
||||
""Akka.Cluster.Tools.PublishSubscribe.DistributedPubSubExtensionProvider, Akka.Cluster.Tools""
|
||||
]
|
||||
actor {{
|
||||
provider = cluster
|
||||
}}
|
||||
remote {{
|
||||
dot-netty.tcp {{
|
||||
hostname = {QuoteHocon(nodeOptions.NodeHostname)}
|
||||
port = {nodeOptions.RemotingPort}
|
||||
}}
|
||||
transport-failure-detector {{
|
||||
heartbeat-interval = {transportHeartbeatSec:F0}s
|
||||
acceptable-heartbeat-pause = {transportFailureSec:F0}s
|
||||
}}
|
||||
}}
|
||||
cluster {{
|
||||
seed-nodes = [{seedNodesStr}]
|
||||
roles = [{rolesStr}]
|
||||
min-nr-of-members = {clusterOptions.MinNrOfMembers}
|
||||
split-brain-resolver {{
|
||||
active-strategy = {QuoteHocon(clusterOptions.SplitBrainResolverStrategy)}
|
||||
stable-after = {clusterOptions.StableAfter.TotalSeconds:F0}s
|
||||
keep-oldest {{
|
||||
down-if-alone = on
|
||||
}}
|
||||
}}
|
||||
failure-detector {{
|
||||
heartbeat-interval = {clusterOptions.HeartbeatInterval.TotalSeconds:F0}s
|
||||
acceptable-heartbeat-pause = {clusterOptions.FailureDetectionThreshold.TotalSeconds:F0}s
|
||||
}}
|
||||
run-coordinated-shutdown-when-down = on
|
||||
}}
|
||||
coordinated-shutdown {{
|
||||
run-by-clr-shutdown-hook = on
|
||||
}}
|
||||
}}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Renders a value as a HOCON double-quoted string, escaping backslashes and
|
||||
/// double quotes so the resulting token cannot break out of its string literal.
|
||||
/// </summary>
|
||||
private static string QuoteHocon(string? value)
|
||||
{
|
||||
var escaped = (value ?? string.Empty)
|
||||
.Replace("\\", "\\\\")
|
||||
.Replace("\"", "\\\"");
|
||||
return $"\"{escaped}\"";
|
||||
}
|
||||
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
@@ -218,7 +248,7 @@ akka {{
|
||||
/// The singleton is scoped to the site-specific cluster role so it runs on exactly
|
||||
/// one node within this site's cluster.
|
||||
/// </summary>
|
||||
private void RegisterSiteActors()
|
||||
private async Task RegisterSiteActorsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var siteRole = $"site-{_nodeOptions.SiteId}";
|
||||
var storage = _serviceProvider.GetRequiredService<SiteStorageService>();
|
||||
@@ -341,8 +371,11 @@ akka {{
|
||||
if (storeAndForwardService != null)
|
||||
{
|
||||
// Initialize SQLite schema and start the retry timer. Must complete before
|
||||
// any actor or HTTP handler touches the service.
|
||||
storeAndForwardService.StartAsync().GetAwaiter().GetResult();
|
||||
// any actor or HTTP handler touches the service. Host-005: awaited rather
|
||||
// than blocked via GetAwaiter().GetResult() — no thread-pool starvation /
|
||||
// sync-context deadlock risk, and exceptions surface as their original type.
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
await storeAndForwardService.StartAsync();
|
||||
|
||||
// Register the store-and-forward delivery handlers so buffered
|
||||
// ExternalSystem calls, cached DB writes and notifications are actually
|
||||
@@ -413,7 +446,22 @@ akka {{
|
||||
contacts.Count, _nodeOptions.SiteId);
|
||||
}
|
||||
|
||||
// Gate gRPC subscriptions until the actor system and SiteStreamManager are initialized
|
||||
// Gate gRPC subscriptions until the actor system and SiteStreamManager are
|
||||
// initialized (REQ-HOST-7).
|
||||
//
|
||||
// Host-009: SetReady asserts a deliberately narrow contract. By this point the
|
||||
// actor system exists, SiteStreamManager.Initialize has run, and every
|
||||
// role actor (SiteCommunicationActor, deployment-manager singleton,
|
||||
// SiteReplicationActor, the ClusterClient) has been created with ActorOf —
|
||||
// creation and the registration Tells are synchronous and strictly ordered.
|
||||
// What is NOT guaranteed is completion of each actor's PreStart or the
|
||||
// ClusterClient's initial-contact handshake with central: those are
|
||||
// intentionally asynchronous. Gating readiness on the central handshake would
|
||||
// be wrong — a site must come up and stream locally even while central is
|
||||
// briefly unreachable. gRPC readiness therefore guarantees "the site actor
|
||||
// graph exists and can accept subscription streams", not "the cluster
|
||||
// handshake has completed". Streams opened before SetReady are already
|
||||
// rejected by SiteStreamGrpcServer with StatusCode.Unavailable.
|
||||
var grpcServer = _serviceProvider.GetService<ScadaLink.Communication.Grpc.SiteStreamGrpcServer>();
|
||||
grpcServer?.SetReady(_actorSystem!);
|
||||
}
|
||||
|
||||
@@ -3,6 +3,5 @@ namespace ScadaLink.Host;
|
||||
public class DatabaseOptions
|
||||
{
|
||||
public string? ConfigurationDb { get; set; }
|
||||
public string? MachineDataDb { get; set; }
|
||||
public string? SiteDbPath { get; set; }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
using Serilog;
|
||||
using Serilog.Events;
|
||||
|
||||
namespace ScadaLink.Host;
|
||||
|
||||
/// <summary>
|
||||
/// Builds the Serilog <see cref="LoggerConfiguration"/> for the Host process.
|
||||
///
|
||||
/// REQ-HOST-8 / Host-011: the configured minimum level comes from
|
||||
/// <c>ScadaLink:Logging:MinimumLevel</c> (bound to <see cref="LoggingOptions"/>) so an
|
||||
/// operator editing that key changes the effective log level. The standard
|
||||
/// <c>Serilog</c> configuration section is still read (via
|
||||
/// <see cref="Serilog.Configuration.ConfigurationLoggerConfigurationExtensions"/>)
|
||||
/// for sink/override customisation; the explicit <c>MinimumLevel.Is</c> below pins
|
||||
/// the floor from <see cref="LoggingOptions"/>.
|
||||
/// </summary>
|
||||
public static class LoggerConfigurationFactory
|
||||
{
|
||||
public static LoggerConfiguration Build(
|
||||
IConfiguration configuration,
|
||||
string nodeRole,
|
||||
string siteId,
|
||||
string nodeHostname)
|
||||
{
|
||||
var loggingOptions = new LoggingOptions();
|
||||
configuration.GetSection("ScadaLink:Logging").Bind(loggingOptions);
|
||||
|
||||
var minimumLevel = ParseLevel(loggingOptions.MinimumLevel);
|
||||
|
||||
return new LoggerConfiguration()
|
||||
.ReadFrom.Configuration(configuration)
|
||||
.MinimumLevel.Is(minimumLevel)
|
||||
.Enrich.WithProperty("SiteId", siteId)
|
||||
.Enrich.WithProperty("NodeHostname", nodeHostname)
|
||||
.Enrich.WithProperty("NodeRole", nodeRole);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses a Serilog <see cref="LogEventLevel"/> name, falling back to
|
||||
/// <see cref="LogEventLevel.Information"/> for null/blank/unrecognised values.
|
||||
/// </summary>
|
||||
private static LogEventLevel ParseLevel(string? level)
|
||||
{
|
||||
return Enum.TryParse<LogEventLevel>(level, ignoreCase: true, out var parsed)
|
||||
? parsed
|
||||
: LogEventLevel.Information;
|
||||
}
|
||||
}
|
||||
@@ -38,12 +38,10 @@ var nodeRole = configuration["ScadaLink:Node:Role"]!;
|
||||
var nodeHostname = configuration["ScadaLink:Node:NodeHostname"] ?? "unknown";
|
||||
var siteId = configuration["ScadaLink:Node:SiteId"] ?? "central";
|
||||
|
||||
// WP-14: Serilog structured logging
|
||||
Log.Logger = new LoggerConfiguration()
|
||||
.ReadFrom.Configuration(configuration)
|
||||
.Enrich.WithProperty("SiteId", siteId)
|
||||
.Enrich.WithProperty("NodeHostname", nodeHostname)
|
||||
.Enrich.WithProperty("NodeRole", nodeRole)
|
||||
// WP-14: Serilog structured logging.
|
||||
// Host-011: minimum level is driven by ScadaLink:Logging:MinimumLevel (LoggingOptions).
|
||||
Log.Logger = ScadaLink.Host.LoggerConfigurationFactory
|
||||
.Build(configuration, nodeRole, siteId, nodeHostname)
|
||||
.WriteTo.Console(outputTemplate:
|
||||
"[{Timestamp:HH:mm:ss} {Level:u3}] [{NodeRole}/{NodeHostname}] {Message:lj}{NewLine}{Exception}")
|
||||
.WriteTo.File("logs/scadalink-.log", rollingInterval: Serilog.RollingInterval.Day)
|
||||
@@ -116,14 +114,24 @@ try
|
||||
{
|
||||
var isDevelopment = app.Environment.IsDevelopment()
|
||||
|| string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Development", StringComparison.OrdinalIgnoreCase);
|
||||
using (var scope = app.Services.CreateScope())
|
||||
{
|
||||
var dbContext = scope.ServiceProvider.GetRequiredService<ScadaLinkDbContext>();
|
||||
var migrationLogger = scope.ServiceProvider
|
||||
.GetRequiredService<ILoggerFactory>()
|
||||
.CreateLogger(typeof(MigrationHelper).FullName!);
|
||||
await MigrationHelper.ApplyOrValidateMigrationsAsync(dbContext, isDevelopment, migrationLogger);
|
||||
}
|
||||
var migrationLogger = app.Services
|
||||
.GetRequiredService<ILoggerFactory>()
|
||||
.CreateLogger(typeof(MigrationHelper).FullName!);
|
||||
|
||||
// Host-010: tolerate a database that is briefly unreachable at boot
|
||||
// (e.g. app and DB containers starting together) with a bounded
|
||||
// exponential backoff before failing fatally.
|
||||
await StartupRetry.ExecuteWithRetryAsync(
|
||||
"database-migration",
|
||||
async () =>
|
||||
{
|
||||
using var scope = app.Services.CreateScope();
|
||||
var dbContext = scope.ServiceProvider.GetRequiredService<ScadaLinkDbContext>();
|
||||
await MigrationHelper.ApplyOrValidateMigrationsAsync(dbContext, isDevelopment, migrationLogger);
|
||||
},
|
||||
maxAttempts: 8,
|
||||
initialDelay: TimeSpan.FromSeconds(2),
|
||||
migrationLogger);
|
||||
}
|
||||
|
||||
// Middleware pipeline
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
namespace ScadaLink.Host;
|
||||
|
||||
/// <summary>
|
||||
/// Bounded retry-with-backoff for startup preconditions.
|
||||
///
|
||||
/// Host-010 / REQ-HOST-4a: a Central node applies/validates database migrations
|
||||
/// before the host begins serving traffic. In container orchestration the database
|
||||
/// and the app frequently start together, so the database may be briefly
|
||||
/// unreachable. Rather than crashing the process on the first connection failure,
|
||||
/// the migration step is wrapped in this bounded exponential backoff: it tolerates a
|
||||
/// short outage and only fails fatally once attempts are exhausted.
|
||||
/// </summary>
|
||||
public static class StartupRetry
|
||||
{
|
||||
public static async Task ExecuteWithRetryAsync(
|
||||
string operationName,
|
||||
Func<Task> operation,
|
||||
int maxAttempts,
|
||||
TimeSpan initialDelay,
|
||||
ILogger logger,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var delay = initialDelay;
|
||||
for (var attempt = 1; ; attempt++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
try
|
||||
{
|
||||
await operation();
|
||||
if (attempt > 1)
|
||||
logger.LogInformation(
|
||||
"Startup operation '{Operation}' succeeded on attempt {Attempt}.",
|
||||
operationName, attempt);
|
||||
return;
|
||||
}
|
||||
catch (Exception ex) when (attempt < maxAttempts)
|
||||
{
|
||||
logger.LogWarning(ex,
|
||||
"Startup operation '{Operation}' failed on attempt {Attempt}/{MaxAttempts}; " +
|
||||
"retrying in {Delay}.",
|
||||
operationName, attempt, maxAttempts, delay);
|
||||
await Task.Delay(delay, cancellationToken);
|
||||
// Exponential backoff, capped so the total wait stays bounded.
|
||||
delay = TimeSpan.FromTicks(Math.Min(delay.Ticks * 2, TimeSpan.FromSeconds(30).Ticks));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -30,8 +30,6 @@ public static class StartupValidator
|
||||
var dbSection = configuration.GetSection("ScadaLink:Database");
|
||||
if (string.IsNullOrEmpty(dbSection["ConfigurationDb"]))
|
||||
errors.Add("ScadaLink:Database:ConfigurationDb connection string required for Central");
|
||||
if (string.IsNullOrEmpty(dbSection["MachineDataDb"]))
|
||||
errors.Add("ScadaLink:Database:MachineDataDb connection string required for Central");
|
||||
|
||||
var secSection = configuration.GetSection("ScadaLink:Security");
|
||||
if (string.IsNullOrEmpty(secSection["LdapServer"]))
|
||||
@@ -51,6 +49,13 @@ public static class StartupValidator
|
||||
if (grpcPortStr != null && (!int.TryParse(grpcPortStr, out grpcPort) || grpcPort < 1 || grpcPort > 65535))
|
||||
errors.Add("ScadaLink:Node:GrpcPort must be 1-65535");
|
||||
|
||||
// Host-007 / REQ-HOST-4: the gRPC (Kestrel HTTP/2) port and the Akka
|
||||
// remoting port must differ. Identical values make Kestrel and
|
||||
// Akka.Remote contend for the same TCP port and fail opaquely at
|
||||
// runtime. Uses the resolved GrpcPort, including the 8083 default.
|
||||
if (port == grpcPort)
|
||||
errors.Add("ScadaLink:Node:GrpcPort must differ from RemotingPort");
|
||||
|
||||
var dbSection = configuration.GetSection("ScadaLink:Database");
|
||||
if (string.IsNullOrEmpty(dbSection["SiteDbPath"]))
|
||||
errors.Add("ScadaLink:Database:SiteDbPath required for Site nodes");
|
||||
|
||||
@@ -16,10 +16,9 @@
|
||||
"FailureDetectionThreshold": "00:00:10",
|
||||
"MinNrOfMembers": 1
|
||||
},
|
||||
"_secrets": "Host-003: Secrets are NOT committed in this file. Supply them via environment variables, which the Host's configuration builder (AddEnvironmentVariables) overlays over this file. Required: ScadaLink__Database__ConfigurationDb, ScadaLink__Database__MachineDataDb, ScadaLink__Security__LdapServiceAccountPassword, ScadaLink__Security__JwtSigningKey. The ${...} placeholders below are intentionally non-functional and must be overridden per environment.",
|
||||
"_secrets": "Host-003: Secrets are NOT committed in this file. Supply them via environment variables, which the Host's configuration builder (AddEnvironmentVariables) overlays over this file. Required: ScadaLink__Database__ConfigurationDb, ScadaLink__Security__LdapServiceAccountPassword, ScadaLink__Security__JwtSigningKey. The ${...} placeholders below are intentionally non-functional and must be overridden per environment.",
|
||||
"Database": {
|
||||
"ConfigurationDb": "${SCADALINK_CONFIGURATIONDB_CONNECTION_STRING}",
|
||||
"MachineDataDb": "${SCADALINK_MACHINEDATADB_CONNECTION_STRING}"
|
||||
"ConfigurationDb": "${SCADALINK_CONFIGURATIONDB_CONNECTION_STRING}"
|
||||
},
|
||||
"Security": {
|
||||
"LdapServer": "localhost",
|
||||
|
||||
@@ -13,6 +13,10 @@ public class ApiKeyValidator
|
||||
{
|
||||
private readonly IInboundApiRepository _repository;
|
||||
|
||||
// InboundAPI-011: the single message used for both "method not found" and
|
||||
// "key not approved" so the two outcomes are indistinguishable to the caller.
|
||||
private const string NotApprovedMessage = "API key not approved for this method";
|
||||
|
||||
public ApiKeyValidator(IInboundApiRepository repository)
|
||||
{
|
||||
_repository = repository;
|
||||
@@ -45,10 +49,15 @@ public class ApiKeyValidator
|
||||
return ApiKeyValidationResult.Unauthorized("Invalid or disabled API key");
|
||||
}
|
||||
|
||||
// InboundAPI-011: "method not found" and "key not approved" must produce an
|
||||
// indistinguishable response. Otherwise a caller holding any valid key could
|
||||
// enumerate which method names exist by observing the status/message
|
||||
// difference. Both cases return 403 with the identical message below, and the
|
||||
// caller-supplied method name is never echoed back into the response.
|
||||
var method = await _repository.GetMethodByNameAsync(methodName, cancellationToken);
|
||||
if (method == null)
|
||||
{
|
||||
return ApiKeyValidationResult.NotFound($"Method '{methodName}' not found");
|
||||
return ApiKeyValidationResult.Forbidden(NotApprovedMessage);
|
||||
}
|
||||
|
||||
// Check if this key is approved for the method
|
||||
@@ -57,7 +66,7 @@ public class ApiKeyValidator
|
||||
|
||||
if (!isApproved)
|
||||
{
|
||||
return ApiKeyValidationResult.Forbidden("API key not approved for this method");
|
||||
return ApiKeyValidationResult.Forbidden(NotApprovedMessage);
|
||||
}
|
||||
|
||||
return ApiKeyValidationResult.Valid(apiKey, method);
|
||||
@@ -108,7 +117,4 @@ public class ApiKeyValidationResult
|
||||
|
||||
public static ApiKeyValidationResult Forbidden(string message) =>
|
||||
new() { IsValid = false, StatusCode = 403, ErrorMessage = message };
|
||||
|
||||
public static ApiKeyValidationResult NotFound(string message) =>
|
||||
new() { IsValid = false, StatusCode = 400, ErrorMessage = message };
|
||||
}
|
||||
|
||||
@@ -21,6 +21,13 @@ public class InboundScriptExecutor
|
||||
// not safe for concurrent read/write, so a ConcurrentDictionary is used throughout.
|
||||
private readonly ConcurrentDictionary<string, Func<InboundScriptContext, Task<object?>>> _scriptHandlers = new();
|
||||
|
||||
// InboundAPI-009: a script that fails to compile (or violates the trust model)
|
||||
// is recorded here so it is compiled at most once. Without this, every subsequent
|
||||
// request for a broken method re-runs the expensive Roslyn compilation — a CPU
|
||||
// amplification vector since the inbound API has no rate limiting. The entry is
|
||||
// cleared whenever the method is (re)compiled via CompileAndRegister.
|
||||
private readonly ConcurrentDictionary<string, byte> _knownBadMethods = new();
|
||||
|
||||
private readonly IServiceProvider _serviceProvider;
|
||||
|
||||
public InboundScriptExecutor(ILogger<InboundScriptExecutor> logger, IServiceProvider serviceProvider)
|
||||
@@ -50,7 +57,21 @@ public class InboundScriptExecutor
|
||||
/// script is empty, fails Roslyn compilation, or violates the script trust model.
|
||||
/// </summary>
|
||||
public bool CompileAndRegister(ApiMethod method)
|
||||
=> Compile(method) is { } handler && Register(method.Name, handler);
|
||||
{
|
||||
var handler = Compile(method);
|
||||
if (handler == null)
|
||||
{
|
||||
// InboundAPI-009: record the failure so the lazy-compile path does not
|
||||
// keep recompiling a broken script on every request.
|
||||
_knownBadMethods[method.Name] = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
// The method definition was (re)compiled successfully — drop any stale
|
||||
// failure record so a fixed script is no longer treated as bad.
|
||||
_knownBadMethods.TryRemove(method.Name, out _);
|
||||
return Register(method.Name, handler);
|
||||
}
|
||||
|
||||
private bool Register(string methodName, Func<InboundScriptContext, Task<object?>> handler)
|
||||
{
|
||||
@@ -157,12 +178,21 @@ public class InboundScriptExecutor
|
||||
|
||||
if (!_scriptHandlers.TryGetValue(method.Name, out var handler))
|
||||
{
|
||||
// InboundAPI-009: a method already known to fail compilation must not
|
||||
// be recompiled on every request — short-circuit before Roslyn runs.
|
||||
if (_knownBadMethods.ContainsKey(method.Name))
|
||||
return new InboundScriptResult(false, null, "Script compilation failed for this method");
|
||||
|
||||
// Lazy compile on first request (handles methods created after startup).
|
||||
// Compile outside the cache so a failed compile is not stored, then add
|
||||
// atomically so concurrent first-callers share a single handler instance.
|
||||
var compiled = Compile(method);
|
||||
if (compiled == null)
|
||||
{
|
||||
// Cache the failure so the next request short-circuits above.
|
||||
_knownBadMethods[method.Name] = 0;
|
||||
return new InboundScriptResult(false, null, "Script compilation failed for this method");
|
||||
}
|
||||
handler = _scriptHandlers.GetOrAdd(method.Name, compiled);
|
||||
}
|
||||
|
||||
|
||||
@@ -61,6 +61,19 @@ public static class ParameterValidator
|
||||
var result = new Dictionary<string, object?>();
|
||||
var errors = new List<string>();
|
||||
|
||||
// InboundAPI-010: report top-level body fields that do not match any defined
|
||||
// parameter, so a caller learns about a typo'd parameter name instead of
|
||||
// having the field silently ignored.
|
||||
var defined = new HashSet<string>(definitions.Select(d => d.Name), StringComparer.Ordinal);
|
||||
var unexpected = body.Value.EnumerateObject()
|
||||
.Select(p => p.Name)
|
||||
.Where(name => !defined.Contains(name))
|
||||
.ToList();
|
||||
if (unexpected.Count > 0)
|
||||
{
|
||||
errors.Add($"Unexpected parameter(s): {string.Join(", ", unexpected)}");
|
||||
}
|
||||
|
||||
foreach (var def in definitions)
|
||||
{
|
||||
if (body.Value.TryGetProperty(def.Name, out var prop))
|
||||
@@ -89,6 +102,13 @@ public static class ParameterValidator
|
||||
return ParameterValidationResult.Valid(result);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coerces a JSON element to the declared parameter type. InboundAPI-010: the
|
||||
/// <c>Object</c> and <c>List</c> extended types are validated for JSON <em>shape</em>
|
||||
/// only (object vs. array) — there is no field-level or element-level type
|
||||
/// validation. A method script that needs a specific nested structure must
|
||||
/// validate it itself; invalid nested data surfaces as a runtime script error.
|
||||
/// </summary>
|
||||
private static (object? value, string? error) CoerceValue(JsonElement element, string expectedType, string paramName)
|
||||
{
|
||||
return expectedType.ToLowerInvariant() switch
|
||||
|
||||
@@ -43,6 +43,22 @@ public class ManagementActor : ReceiveActor
|
||||
Receive<ManagementEnvelope>(HandleEnvelope);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the supervision strategy for <see cref="ManagementActor"/>. Per the project's
|
||||
/// Akka.NET conventions, coordinator-style actors use a Resume-based strategy so a faulted
|
||||
/// child preserves its state rather than restarting. <see cref="ManagementActor"/> spawns
|
||||
/// no children today, but declaring the strategy explicitly matches the convention and
|
||||
/// makes the contract correct ahead of any future worker actors (finding
|
||||
/// ManagementService-005).
|
||||
/// </summary>
|
||||
public static SupervisorStrategy CreateSupervisorStrategy() =>
|
||||
new OneForOneStrategy(
|
||||
maxNrOfRetries: -1,
|
||||
withinTimeRange: System.Threading.Timeout.InfiniteTimeSpan,
|
||||
decider: Decider.From(_ => Directive.Resume));
|
||||
|
||||
protected override SupervisorStrategy SupervisorStrategy() => CreateSupervisorStrategy();
|
||||
|
||||
/// <summary>
|
||||
/// Serializer settings for command results. <see cref="ReferenceHandler.IgnoreCycles"/>
|
||||
/// keeps EF-backed entity graphs with bidirectional navigation properties from throwing;
|
||||
@@ -304,29 +320,16 @@ public class ManagementActor : ReceiveActor
|
||||
DiscardParkedMessageCommand cmd => await HandleDiscardParkedMessage(sp, cmd, user),
|
||||
DebugSnapshotCommand cmd => await HandleDebugSnapshot(sp, cmd, user),
|
||||
|
||||
// Role resolution (for CLI LDAP auth)
|
||||
ResolveRolesCommand cmd => await HandleResolveRoles(sp, cmd),
|
||||
|
||||
// NOTE: ResolveRolesCommand is intentionally NOT dispatched. The two-step
|
||||
// "ResolveRoles + command" flow is retired — the HTTP endpoint performs LDAP
|
||||
// auth and role resolution itself before sending a single envelope. Leaving a
|
||||
// handler would expose role-mapping data to any raw ClusterClient sender with
|
||||
// no role requirement; the command now falls through to the default below
|
||||
// (finding ManagementService-011).
|
||||
_ => throw new NotSupportedException($"Unknown command type: {command.GetType().Name}")
|
||||
};
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Role resolution
|
||||
// ========================================================================
|
||||
|
||||
private static async Task<object?> HandleResolveRoles(IServiceProvider sp, ResolveRolesCommand cmd)
|
||||
{
|
||||
var roleMapper = new RoleMapper(sp.GetRequiredService<ISecurityRepository>());
|
||||
var result = await roleMapper.MapGroupsToRolesAsync(cmd.LdapGroups);
|
||||
return new
|
||||
{
|
||||
Roles = result.Roles,
|
||||
PermittedSiteIds = result.PermittedSiteIds,
|
||||
IsSystemWideDeployment = result.IsSystemWideDeployment
|
||||
};
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Site-scope enforcement
|
||||
// ========================================================================
|
||||
|
||||
@@ -6,6 +6,7 @@ using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Messages.Management;
|
||||
using ScadaLink.Security;
|
||||
|
||||
@@ -13,7 +14,20 @@ namespace ScadaLink.ManagementService;
|
||||
|
||||
public static class ManagementEndpoints
|
||||
{
|
||||
private static readonly TimeSpan AskTimeout = TimeSpan.FromSeconds(30);
|
||||
private static readonly TimeSpan DefaultAskTimeout = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the ManagementActor Ask timeout from configuration
|
||||
/// (finding ManagementService-010). Falls back to <see cref="DefaultAskTimeout"/>
|
||||
/// when options are absent or the configured value is not strictly positive — a
|
||||
/// zero/negative timeout would make every management call fail immediately.
|
||||
/// </summary>
|
||||
public static TimeSpan ResolveAskTimeout(ManagementServiceOptions? options)
|
||||
{
|
||||
if (options is { CommandTimeout: { Ticks: > 0 } configured })
|
||||
return configured;
|
||||
return DefaultAskTimeout;
|
||||
}
|
||||
|
||||
public static IEndpointRouteBuilder MapManagementAPI(this IEndpointRouteBuilder endpoints)
|
||||
{
|
||||
@@ -101,10 +115,13 @@ public static class ManagementEndpoints
|
||||
var correlationId = Guid.NewGuid().ToString("N");
|
||||
var envelope = new ManagementEnvelope(authenticatedUser, command, correlationId);
|
||||
|
||||
var askTimeout = ResolveAskTimeout(
|
||||
context.RequestServices.GetService<IOptions<ManagementServiceOptions>>()?.Value);
|
||||
|
||||
object response;
|
||||
try
|
||||
{
|
||||
response = await holder.ActorRef.Ask(envelope, AskTimeout);
|
||||
response = await holder.ActorRef.Ask(envelope, askTimeout);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
|
||||
@@ -315,8 +315,6 @@ public class NotificationDeliveryService : INotificationDeliveryService
|
||||
|
||||
var bccAddresses = recipients.Select(r => r.EmailAddress).ToList();
|
||||
await smtp.SendAsync(config.FromAddress, bccAddresses, subject, body, cancellationToken);
|
||||
|
||||
await smtp.DisconnectAsync(cancellationToken);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
@@ -334,6 +332,23 @@ public class NotificationDeliveryService : INotificationDeliveryService
|
||||
// catch filters (SmtpPermanentException / IsTransientSmtpError) handle them.
|
||||
finally
|
||||
{
|
||||
// NS-010: always tear the connection down, regardless of outcome. The
|
||||
// SMTP QUIT used to run only on the success path inside the try block,
|
||||
// so a failed Connect/Authenticate/Send left an open, authenticated
|
||||
// connection until finalization reclaimed the socket — exhausting the
|
||||
// server's connection slots under sustained transient failures.
|
||||
// Disconnect is best-effort: a disconnect failure (e.g. the connection
|
||||
// is already dead) must not mask the original delivery exception.
|
||||
try
|
||||
{
|
||||
await smtp.DisconnectAsync(cancellationToken);
|
||||
}
|
||||
catch (Exception disconnectEx)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Ignoring SMTP disconnect failure during cleanup: {Reason}", disconnectEx.Message);
|
||||
}
|
||||
|
||||
// NS-007: always release the concurrency slot, even on failure.
|
||||
limiter.Release();
|
||||
}
|
||||
@@ -399,12 +414,3 @@ public class NotificationDeliveryService : INotificationDeliveryService
|
||||
return ClassifySmtpError(ex, cancellationToken) == SmtpErrorClass.Transient;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Signals a permanent SMTP failure (5xx) that should not be retried.
|
||||
/// </summary>
|
||||
public class SmtpPermanentException : Exception
|
||||
{
|
||||
public SmtpPermanentException(string message, Exception? innerException = null)
|
||||
: base(message, innerException) { }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace ScadaLink.NotificationService;
|
||||
|
||||
/// <summary>
|
||||
/// Signals a permanent SMTP failure (5xx) that should not be retried.
|
||||
/// </summary>
|
||||
public class SmtpPermanentException : Exception
|
||||
{
|
||||
public SmtpPermanentException(string message, Exception? innerException = null)
|
||||
: base(message, innerException) { }
|
||||
}
|
||||
@@ -34,6 +34,12 @@ public class LdapAuthService
|
||||
{
|
||||
using var connection = new LdapConnection();
|
||||
|
||||
// Bound how long a hung LDAP server can pin a thread-pool thread. The
|
||||
// `ct` passed to Task.Run below only prevents the work item from starting;
|
||||
// it cannot interrupt an in-progress blocking Connect/Bind/Search. This
|
||||
// timeout is the real safeguard (Security-009).
|
||||
ApplyConnectionTimeout(connection);
|
||||
|
||||
// LDAPS: TLS negotiated at connection time. StartTLS: connect plaintext,
|
||||
// then upgrade the session before any credentials are sent.
|
||||
if (_options.LdapTransport == LdapTransport.Ldaps)
|
||||
@@ -127,6 +133,27 @@ public class LdapAuthService
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Applies <see cref="SecurityOptions.LdapConnectionTimeoutMs"/> to both the socket
|
||||
/// connect timeout and the per-operation (bind/search) time limit, so a hung or
|
||||
/// unresponsive LDAP server cannot pin a thread-pool thread indefinitely. The
|
||||
/// <c>CancellationToken</c> handed to the <c>Task.Run</c> wrappers only guards
|
||||
/// work-item scheduling and cannot interrupt an in-progress blocking call.
|
||||
/// </summary>
|
||||
private void ApplyConnectionTimeout(LdapConnection connection)
|
||||
{
|
||||
var timeoutMs = _options.LdapConnectionTimeoutMs;
|
||||
if (timeoutMs <= 0)
|
||||
return;
|
||||
|
||||
connection.ConnectionTimeout = timeoutMs;
|
||||
|
||||
// LdapConstraints.TimeLimit is the server-side operation time limit in ms.
|
||||
var constraints = connection.Constraints;
|
||||
constraints.TimeLimit = timeoutMs;
|
||||
connection.Constraints = constraints;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the user's full DN. When a service account is configured, performs a
|
||||
/// search-then-bind lookup. Otherwise falls back to constructing the DN directly.
|
||||
|
||||
@@ -65,6 +65,16 @@ public class SecurityOptions
|
||||
/// </summary>
|
||||
public string LdapGroupAttribute { get; set; } = "memberOf";
|
||||
|
||||
/// <summary>
|
||||
/// Network timeout, in milliseconds, applied to the LDAP socket connect and to
|
||||
/// LDAP operations (bind/search). The synchronous Novell LDAP calls are wrapped
|
||||
/// in <c>Task.Run</c>, where the <c>CancellationToken</c> only guards work-item
|
||||
/// scheduling — it cannot interrupt an in-progress blocking call. This timeout is
|
||||
/// the real safeguard: it bounds how long a hung LDAP server can pin a thread-pool
|
||||
/// thread (Security-009). Default 10 seconds.
|
||||
/// </summary>
|
||||
public int LdapConnectionTimeoutMs { get; set; } = 10_000;
|
||||
|
||||
/// <summary>
|
||||
/// Symmetric HMAC-SHA256 signing key for cookie-embedded JWTs. Must be at least
|
||||
/// 32 bytes (256 bits) — validated at <see cref="JwtTokenService"/> construction.
|
||||
|
||||
@@ -4,11 +4,11 @@ namespace ScadaLink.Host.Tests;
|
||||
/// Host-003: <c>appsettings.Central.json</c> no longer commits database connection
|
||||
/// strings — they are externalised to environment variables. Tests that exercise the
|
||||
/// full <c>Program</c> startup pipeline against the real SQL provider must therefore
|
||||
/// supply the local dev connection strings the way a deployment would: via
|
||||
/// environment variables (<c>Program</c>'s configuration builder calls
|
||||
/// supply the local dev connection string the way a deployment would: via an
|
||||
/// environment variable (<c>Program</c>'s configuration builder calls
|
||||
/// <c>AddEnvironmentVariables()</c>).
|
||||
///
|
||||
/// Dispose restores the previous values so tests stay isolated.
|
||||
/// Dispose restores the previous value so tests stay isolated.
|
||||
/// </summary>
|
||||
internal sealed class CentralDbTestEnvironment : IDisposable
|
||||
{
|
||||
@@ -16,26 +16,19 @@ internal sealed class CentralDbTestEnvironment : IDisposable
|
||||
// This is a test fixture value, not a committed production secret.
|
||||
private const string ConfigurationDb =
|
||||
"Server=localhost,1433;Database=ScadaLinkConfig;User Id=scadalink_app;Password=ScadaLink_Dev1#;TrustServerCertificate=true";
|
||||
private const string MachineDataDb =
|
||||
"Server=localhost,1433;Database=ScadaLinkMachineData;User Id=scadalink_app;Password=ScadaLink_Dev1#;TrustServerCertificate=true";
|
||||
|
||||
private const string ConfigKey = "ScadaLink__Database__ConfigurationDb";
|
||||
private const string MachineKey = "ScadaLink__Database__MachineDataDb";
|
||||
|
||||
private readonly string? _previousConfig;
|
||||
private readonly string? _previousMachine;
|
||||
|
||||
public CentralDbTestEnvironment()
|
||||
{
|
||||
_previousConfig = Environment.GetEnvironmentVariable(ConfigKey);
|
||||
_previousMachine = Environment.GetEnvironmentVariable(MachineKey);
|
||||
Environment.SetEnvironmentVariable(ConfigKey, ConfigurationDb);
|
||||
Environment.SetEnvironmentVariable(MachineKey, MachineDataDb);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Environment.SetEnvironmentVariable(ConfigKey, _previousConfig);
|
||||
Environment.SetEnvironmentVariable(MachineKey, _previousMachine);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
using Akka.Configuration;
|
||||
using ScadaLink.ClusterInfrastructure;
|
||||
using ScadaLink.Host.Actors;
|
||||
|
||||
namespace ScadaLink.Host.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Host-006: the Akka HOCON document must be assembled with every interpolated value
|
||||
/// quoted/escaped, so a hostname, seed-node URI or split-brain strategy containing a
|
||||
/// quote, backslash or whitespace cannot corrupt the document or be silently
|
||||
/// misparsed.
|
||||
/// </summary>
|
||||
public class HoconBuilderTests
|
||||
{
|
||||
private static ClusterOptions DefaultCluster() => new()
|
||||
{
|
||||
SeedNodes = new List<string>
|
||||
{
|
||||
"akka.tcp://scadalink@localhost:8081",
|
||||
"akka.tcp://scadalink@localhost:8082",
|
||||
},
|
||||
SplitBrainResolverStrategy = "keep-oldest",
|
||||
MinNrOfMembers = 1,
|
||||
StableAfter = TimeSpan.FromSeconds(15),
|
||||
HeartbeatInterval = TimeSpan.FromSeconds(2),
|
||||
FailureDetectionThreshold = TimeSpan.FromSeconds(10),
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void BuildHocon_PlainValues_ParsesAndPreservesValues()
|
||||
{
|
||||
var node = new NodeOptions
|
||||
{
|
||||
Role = "Central",
|
||||
NodeHostname = "central-node1",
|
||||
RemotingPort = 8081,
|
||||
};
|
||||
|
||||
var hocon = AkkaHostedService.BuildHocon(
|
||||
node, DefaultCluster(), new[] { "Central" }, 5, 15);
|
||||
|
||||
var config = ConfigurationFactory.ParseString(hocon);
|
||||
Assert.Equal("central-node1", config.GetString("akka.remote.dot-netty.tcp.hostname"));
|
||||
Assert.Equal("keep-oldest", config.GetString("akka.cluster.split-brain-resolver.active-strategy"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildHocon_HostnameWithQuote_DoesNotCorruptDocument()
|
||||
{
|
||||
// A hostname containing a double quote would, with raw interpolation, close
|
||||
// the HOCON string literal early and corrupt every following key.
|
||||
var node = new NodeOptions
|
||||
{
|
||||
Role = "Central",
|
||||
NodeHostname = "evil\"host",
|
||||
RemotingPort = 8081,
|
||||
};
|
||||
|
||||
var hocon = AkkaHostedService.BuildHocon(
|
||||
node, DefaultCluster(), new[] { "Central" }, 5, 15);
|
||||
|
||||
// Must still parse, and the keys after the hostname must remain intact.
|
||||
var config = ConfigurationFactory.ParseString(hocon);
|
||||
Assert.Equal("evil\"host", config.GetString("akka.remote.dot-netty.tcp.hostname"));
|
||||
Assert.Equal(8081, config.GetInt("akka.remote.dot-netty.tcp.port"));
|
||||
Assert.Equal(1, config.GetInt("akka.cluster.min-nr-of-members"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildHocon_StrategyWithWhitespace_RemainsQuoted()
|
||||
{
|
||||
var cluster = DefaultCluster();
|
||||
cluster.SplitBrainResolverStrategy = "keep oldest";
|
||||
var node = new NodeOptions
|
||||
{
|
||||
Role = "Central",
|
||||
NodeHostname = "node1",
|
||||
RemotingPort = 8081,
|
||||
};
|
||||
|
||||
var hocon = AkkaHostedService.BuildHocon(
|
||||
node, cluster, new[] { "Central" }, 5, 15);
|
||||
|
||||
var config = ConfigurationFactory.ParseString(hocon);
|
||||
Assert.Equal("keep oldest", config.GetString("akka.cluster.split-brain-resolver.active-strategy"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Serilog.Events;
|
||||
|
||||
namespace ScadaLink.Host.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Host-011: <c>ScadaLink:Logging:MinimumLevel</c> must actually drive the Serilog
|
||||
/// minimum level. Previously the value was bound into <see cref="LoggingOptions"/>
|
||||
/// but never read, so editing it had no effect.
|
||||
/// </summary>
|
||||
public class LoggerConfigurationTests
|
||||
{
|
||||
private static IConfiguration BuildConfig(string? minimumLevel)
|
||||
{
|
||||
var values = new Dictionary<string, string?>();
|
||||
if (minimumLevel != null)
|
||||
values["ScadaLink:Logging:MinimumLevel"] = minimumLevel;
|
||||
return new ConfigurationBuilder().AddInMemoryCollection(values).Build();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MinimumLevel_Warning_SuppressesInformationLogs()
|
||||
{
|
||||
var sink = new InMemorySink();
|
||||
var logger = LoggerConfigurationFactory
|
||||
.Build(BuildConfig("Warning"), "Central", "central", "node1")
|
||||
.WriteTo.Sink(sink)
|
||||
.CreateLogger();
|
||||
|
||||
logger.Information("info message");
|
||||
logger.Warning("warning message");
|
||||
|
||||
Assert.Single(sink.LogEvents);
|
||||
Assert.Equal(LogEventLevel.Warning, sink.LogEvents[0].Level);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MinimumLevel_Debug_AllowsDebugLogs()
|
||||
{
|
||||
var sink = new InMemorySink();
|
||||
var logger = LoggerConfigurationFactory
|
||||
.Build(BuildConfig("Debug"), "Site", "site-a", "node1")
|
||||
.WriteTo.Sink(sink)
|
||||
.CreateLogger();
|
||||
|
||||
logger.Debug("debug message");
|
||||
|
||||
Assert.Single(sink.LogEvents);
|
||||
Assert.Equal(LogEventLevel.Debug, sink.LogEvents[0].Level);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MinimumLevel_Absent_DefaultsToInformation()
|
||||
{
|
||||
var sink = new InMemorySink();
|
||||
var logger = LoggerConfigurationFactory
|
||||
.Build(BuildConfig(null), "Central", "central", "node1")
|
||||
.WriteTo.Sink(sink)
|
||||
.CreateLogger();
|
||||
|
||||
logger.Debug("debug message");
|
||||
logger.Information("info message");
|
||||
|
||||
Assert.Single(sink.LogEvents);
|
||||
Assert.Equal(LogEventLevel.Information, sink.LogEvents[0].Level);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ScadaLink.Host.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Host-010: startup preconditions (database migration) must tolerate a database
|
||||
/// that is briefly unavailable at boot — common when an app container and its DB
|
||||
/// container start together — via a bounded retry with backoff.
|
||||
/// </summary>
|
||||
public class StartupRetryTests
|
||||
{
|
||||
[Fact]
|
||||
public async Task ExecuteWithRetry_SucceedsFirstTry_RunsOnce()
|
||||
{
|
||||
var attempts = 0;
|
||||
await StartupRetry.ExecuteWithRetryAsync(
|
||||
"test-op",
|
||||
() => { attempts++; return Task.CompletedTask; },
|
||||
maxAttempts: 5,
|
||||
initialDelay: TimeSpan.FromMilliseconds(1),
|
||||
NullLogger.Instance);
|
||||
|
||||
Assert.Equal(1, attempts);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExecuteWithRetry_TransientFailures_RetriesUntilSuccess()
|
||||
{
|
||||
var attempts = 0;
|
||||
await StartupRetry.ExecuteWithRetryAsync(
|
||||
"test-op",
|
||||
() =>
|
||||
{
|
||||
attempts++;
|
||||
if (attempts < 3)
|
||||
throw new InvalidOperationException("db not ready");
|
||||
return Task.CompletedTask;
|
||||
},
|
||||
maxAttempts: 5,
|
||||
initialDelay: TimeSpan.FromMilliseconds(1),
|
||||
NullLogger.Instance);
|
||||
|
||||
Assert.Equal(3, attempts);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExecuteWithRetry_ExhaustsAttempts_RethrowsLastException()
|
||||
{
|
||||
var attempts = 0;
|
||||
var ex = await Assert.ThrowsAsync<InvalidOperationException>(() =>
|
||||
StartupRetry.ExecuteWithRetryAsync(
|
||||
"test-op",
|
||||
() =>
|
||||
{
|
||||
attempts++;
|
||||
throw new InvalidOperationException($"failure {attempts}");
|
||||
},
|
||||
maxAttempts: 3,
|
||||
initialDelay: TimeSpan.FromMilliseconds(1),
|
||||
NullLogger.Instance));
|
||||
|
||||
Assert.Equal(3, attempts);
|
||||
Assert.Equal("failure 3", ex.Message);
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,6 @@ public class StartupValidatorTests
|
||||
["ScadaLink:Node:NodeHostname"] = "central-node1",
|
||||
["ScadaLink:Node:RemotingPort"] = "8081",
|
||||
["ScadaLink:Database:ConfigurationDb"] = "Server=localhost;Database=Config;",
|
||||
["ScadaLink:Database:MachineDataDb"] = "Server=localhost;Database=MachineData;",
|
||||
["ScadaLink:Security:LdapServer"] = "ldap.example.com",
|
||||
["ScadaLink:Security:JwtSigningKey"] = "test-signing-key-at-least-32-chars-long",
|
||||
["ScadaLink:Cluster:SeedNodes:0"] = "akka.tcp://scadalink@central-node1:8081",
|
||||
@@ -151,14 +150,17 @@ public class StartupValidatorTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Central_MissingMachineDataDb_FailsValidation()
|
||||
public void Central_MissingMachineDataDb_PassesValidation()
|
||||
{
|
||||
// Host-008 regression: MachineDataDb is never consumed anywhere in the
|
||||
// system (only ConfigurationDb is wired into AddConfigurationDatabase).
|
||||
// It is no longer a required key, so its absence must not fail startup.
|
||||
var values = ValidCentralConfig();
|
||||
values.Remove("ScadaLink:Database:MachineDataDb");
|
||||
var config = BuildConfig(values);
|
||||
|
||||
var ex = Assert.Throws<InvalidOperationException>(() => StartupValidator.Validate(config));
|
||||
Assert.Contains("MachineDataDb connection string required for Central", ex.Message);
|
||||
var ex = Record.Exception(() => StartupValidator.Validate(config));
|
||||
Assert.Null(ex);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -310,6 +312,46 @@ public class StartupValidatorTests
|
||||
Assert.Null(ex);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Site_GrpcPortEqualsRemotingPort_FailsValidation()
|
||||
{
|
||||
// Host-007 regression: REQ-HOST-4 requires GrpcPort to differ from
|
||||
// RemotingPort. Identical values cause Kestrel and Akka.Remote to
|
||||
// contend for the same port at runtime.
|
||||
var values = ValidSiteConfig();
|
||||
values["ScadaLink:Node:RemotingPort"] = "8082";
|
||||
values["ScadaLink:Node:GrpcPort"] = "8082";
|
||||
var config = BuildConfig(values);
|
||||
|
||||
var ex = Assert.Throws<InvalidOperationException>(() => StartupValidator.Validate(config));
|
||||
Assert.Contains("GrpcPort must differ from RemotingPort", ex.Message);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Site_DefaultGrpcPortEqualsRemotingPort_FailsValidation()
|
||||
{
|
||||
// GrpcPort absent => NodeOptions default 8083. A site whose RemotingPort
|
||||
// is also 8083 must still be rejected.
|
||||
var values = ValidSiteConfig();
|
||||
values["ScadaLink:Node:RemotingPort"] = "8083";
|
||||
var config = BuildConfig(values);
|
||||
|
||||
var ex = Assert.Throws<InvalidOperationException>(() => StartupValidator.Validate(config));
|
||||
Assert.Contains("GrpcPort must differ from RemotingPort", ex.Message);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Site_GrpcPortDiffersFromRemotingPort_PassesValidation()
|
||||
{
|
||||
var values = ValidSiteConfig();
|
||||
values["ScadaLink:Node:RemotingPort"] = "8082";
|
||||
values["ScadaLink:Node:GrpcPort"] = "8083";
|
||||
var config = BuildConfig(values);
|
||||
|
||||
var ex = Record.Exception(() => StartupValidator.Validate(config));
|
||||
Assert.Null(ex);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MultipleErrors_AllReported()
|
||||
{
|
||||
|
||||
@@ -56,15 +56,45 @@ public class ApiKeyValidatorTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ValidKey_MethodNotFound_Returns400()
|
||||
public async Task ValidKey_MethodNotFound_IsIndistinguishableFromNotApproved()
|
||||
{
|
||||
// InboundAPI-011: a "method not found" response must not be observably
|
||||
// different from a "key not approved" response, or a caller holding any
|
||||
// valid key could enumerate which method names exist on the central API.
|
||||
var key = new ApiKey("test", "valid-key") { Id = 1, IsEnabled = true };
|
||||
var method = new ApiMethod("realMethod", "return 1;") { Id = 10 };
|
||||
|
||||
_repository.GetAllApiKeysAsync().Returns(new List<ApiKey> { key });
|
||||
_repository.GetMethodByNameAsync("nonExistent").Returns((ApiMethod?)null);
|
||||
_repository.GetMethodByNameAsync("realMethod").Returns(method);
|
||||
_repository.GetApprovedKeysForMethodAsync(10).Returns(new List<ApiKey>());
|
||||
|
||||
var notFound = await _validator.ValidateAsync("valid-key", "nonExistent");
|
||||
var notApproved = await _validator.ValidateAsync("valid-key", "realMethod");
|
||||
|
||||
Assert.False(notFound.IsValid);
|
||||
Assert.False(notApproved.IsValid);
|
||||
// Status code and error message must be identical so existence is not observable.
|
||||
Assert.Equal(notApproved.StatusCode, notFound.StatusCode);
|
||||
Assert.Equal(notApproved.ErrorMessage, notFound.ErrorMessage);
|
||||
Assert.Equal(403, notFound.StatusCode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ValidKey_MethodNotFound_ErrorMessageDoesNotEchoMethodName()
|
||||
{
|
||||
// InboundAPI-011: the error body must not echo the caller-supplied method
|
||||
// name back verbatim (reflected-input) and must not confirm non-existence.
|
||||
var key = new ApiKey("test", "valid-key") { Id = 1, IsEnabled = true };
|
||||
_repository.GetAllApiKeysAsync().Returns(new List<ApiKey> { key });
|
||||
_repository.GetMethodByNameAsync("probe-XYZ").Returns((ApiMethod?)null);
|
||||
|
||||
var result = await _validator.ValidateAsync("valid-key", "probe-XYZ");
|
||||
|
||||
var result = await _validator.ValidateAsync("valid-key", "nonExistent");
|
||||
Assert.False(result.IsValid);
|
||||
Assert.Equal(400, result.StatusCode);
|
||||
Assert.DoesNotContain("probe-XYZ", result.ErrorMessage ?? string.Empty);
|
||||
Assert.DoesNotContain("not found", result.ErrorMessage ?? string.Empty,
|
||||
StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Entities.InboundApi;
|
||||
@@ -323,4 +324,63 @@ public class InboundScriptExecutorTests
|
||||
Assert.False(result.Success);
|
||||
Assert.Contains("timed out", result.ErrorMessage);
|
||||
}
|
||||
|
||||
// --- InboundAPI-009: a script that fails to compile must be compiled at most
|
||||
// once — repeated calls must not re-run the expensive Roslyn compilation. ---
|
||||
|
||||
[Fact]
|
||||
public async Task FailedCompilation_IsNotRetriedOnEveryRequest()
|
||||
{
|
||||
// A broken script compiled once must be remembered as bad: subsequent
|
||||
// ExecuteAsync calls must NOT recompile (CPU amplification vector — there is
|
||||
// no rate limiting on the inbound API). Compilation is observed via the
|
||||
// "compilation failed" log line, which must appear exactly once.
|
||||
var counter = new CompileLogCounter();
|
||||
var executor = new InboundScriptExecutor(
|
||||
new CountingLogger<InboundScriptExecutor>(counter),
|
||||
Substitute.For<IServiceProvider>());
|
||||
|
||||
var method = new ApiMethod("broken", "%%% invalid C# %%%") { Id = 1, TimeoutSeconds = 10 };
|
||||
|
||||
for (var i = 0; i < 5; i++)
|
||||
{
|
||||
var result = await executor.ExecuteAsync(
|
||||
method, new Dictionary<string, object?>(), _route, TimeSpan.FromSeconds(10));
|
||||
Assert.False(result.Success);
|
||||
}
|
||||
|
||||
Assert.Equal(1, counter.CompilationFailures);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FailedCompilation_RecompilesAfterCompileAndRegisterCalledAgain()
|
||||
{
|
||||
// The failure cache must not be permanent: when the method definition is
|
||||
// updated via CompileAndRegister, a now-valid script must register.
|
||||
var bad = new ApiMethod("fixable", "%%% invalid %%%") { Id = 1, TimeoutSeconds = 10 };
|
||||
Assert.False(_executor.CompileAndRegister(bad));
|
||||
|
||||
var good = new ApiMethod("fixable", "return 1;") { Id = 1, TimeoutSeconds = 10 };
|
||||
Assert.True(_executor.CompileAndRegister(good));
|
||||
}
|
||||
|
||||
private sealed class CompileLogCounter
|
||||
{
|
||||
public int CompilationFailures;
|
||||
}
|
||||
|
||||
private sealed class CountingLogger<T>(CompileLogCounter counter) : ILogger<T>
|
||||
{
|
||||
public IDisposable? BeginScope<TState>(TState state) where TState : notnull => null;
|
||||
public bool IsEnabled(LogLevel logLevel) => true;
|
||||
|
||||
public void Log<TState>(
|
||||
LogLevel logLevel, EventId eventId, TState state, Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
var message = formatter(state, exception);
|
||||
if (message.Contains("script compilation failed", StringComparison.OrdinalIgnoreCase))
|
||||
Interlocked.Increment(ref counter.CompilationFailures);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,4 +134,40 @@ public class ParameterValidatorTests
|
||||
Assert.False(result.IsValid);
|
||||
Assert.Contains("Unknown parameter type", result.ErrorMessage);
|
||||
}
|
||||
|
||||
// --- InboundAPI-010: unexpected top-level body fields must be reported so
|
||||
// callers get feedback on typo'd parameter names instead of silent ignore. ---
|
||||
|
||||
[Fact]
|
||||
public void UnexpectedBodyField_ReturnsInvalid()
|
||||
{
|
||||
var definitions = JsonSerializer.Serialize(new[]
|
||||
{
|
||||
new { Name = "value", Type = "Integer", Required = true }
|
||||
});
|
||||
|
||||
// "valeu" is a typo for "value"; the caller must be told, not ignored.
|
||||
using var doc = JsonDocument.Parse("{\"value\": 1, \"valeu\": 2}");
|
||||
var result = ParameterValidator.Validate(doc.RootElement.Clone(), definitions);
|
||||
|
||||
Assert.False(result.IsValid);
|
||||
Assert.Contains("valeu", result.ErrorMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OnlyDefinedFields_StillValid()
|
||||
{
|
||||
// Regression guard: a body containing exactly the defined parameters
|
||||
// must continue to validate.
|
||||
var definitions = JsonSerializer.Serialize(new[]
|
||||
{
|
||||
new { Name = "value", Type = "Integer", Required = true }
|
||||
});
|
||||
|
||||
using var doc = JsonDocument.Parse("{\"value\": 1}");
|
||||
var result = ParameterValidator.Validate(doc.RootElement.Clone(), definitions);
|
||||
|
||||
Assert.True(result.IsValid);
|
||||
Assert.Equal((long)1, result.Parameters["value"]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
using Akka.Actor;
|
||||
using ScadaLink.ManagementService;
|
||||
|
||||
namespace ScadaLink.ManagementService.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Tests for the <see cref="ManagementActor"/> supervision strategy
|
||||
/// (finding ManagementService-005). The project convention is that long-lived
|
||||
/// coordinator-style actors declare an explicit Resume-based strategy.
|
||||
/// </summary>
|
||||
public class ManagementActorSupervisionTests
|
||||
{
|
||||
[Fact]
|
||||
public void CreateSupervisorStrategy_ReturnsOneForOneStrategy()
|
||||
{
|
||||
var strategy = ManagementActor.CreateSupervisorStrategy();
|
||||
|
||||
Assert.IsType<OneForOneStrategy>(strategy);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CreateSupervisorStrategy_ResumesOnArbitraryException()
|
||||
{
|
||||
var strategy = (OneForOneStrategy)ManagementActor.CreateSupervisorStrategy();
|
||||
|
||||
var directive = strategy.Decider.Decide(new InvalidOperationException("boom"));
|
||||
|
||||
Assert.Equal(Directive.Resume, directive);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CreateSupervisorStrategy_ResumesIndefinitely()
|
||||
{
|
||||
var strategy = (OneForOneStrategy)ManagementActor.CreateSupervisorStrategy();
|
||||
|
||||
// Coordinator actors should not give up: unbounded retries.
|
||||
Assert.Equal(-1, strategy.MaxNumberOfRetries);
|
||||
}
|
||||
}
|
||||
@@ -715,6 +715,33 @@ public class ManagementActorTests : TestKit, IDisposable
|
||||
// continuation rather than escaping or being silently dropped.
|
||||
// ========================================================================
|
||||
|
||||
// ========================================================================
|
||||
// ResolveRolesCommand dead-code removal (finding ManagementService-011 / -008)
|
||||
//
|
||||
// The two-step ResolveRoles + command flow is retired: the HTTP endpoint does
|
||||
// LDAP auth and role resolution itself. The actor must no longer dispatch
|
||||
// ResolveRolesCommand — a stray ClusterClient sender hitting it gets a uniform
|
||||
// ManagementError rather than an unauthenticated role-mapping enumeration.
|
||||
// ========================================================================
|
||||
|
||||
[Fact]
|
||||
public void ResolveRolesCommand_IsNoLongerDispatched_ReturnsManagementError()
|
||||
{
|
||||
var secRepo = Substitute.For<ISecurityRepository>();
|
||||
_services.AddScoped(_ => secRepo);
|
||||
|
||||
var actor = CreateActor();
|
||||
var envelope = Envelope(new ResolveRolesCommand(new[] { "cn=admins" }));
|
||||
|
||||
actor.Tell(envelope);
|
||||
|
||||
var response = ExpectMsg<ManagementError>(TimeSpan.FromSeconds(5));
|
||||
Assert.Equal(envelope.CorrelationId, response.CorrelationId);
|
||||
Assert.Equal("COMMAND_FAILED", response.ErrorCode);
|
||||
// No role-mapping data is enumerated/leaked back to the caller.
|
||||
secRepo.DidNotReceiveWithAnyArgs().GetAllMappingsAsync(default);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UnknownCommandType_FaultMappedToManagementError()
|
||||
{
|
||||
|
||||
@@ -62,4 +62,41 @@ public class ManagementEndpointsTests
|
||||
Assert.False(result.Success);
|
||||
Assert.Equal("BAD_REQUEST", result.ErrorCode);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Command-timeout configuration (finding ManagementService-010)
|
||||
//
|
||||
// ManagementServiceOptions.CommandTimeout must actually drive the Ask
|
||||
// timeout instead of a hard-coded 30s constant.
|
||||
// ========================================================================
|
||||
|
||||
[Fact]
|
||||
public void ResolveAskTimeout_UsesConfiguredCommandTimeout()
|
||||
{
|
||||
var options = new ManagementServiceOptions { CommandTimeout = TimeSpan.FromSeconds(75) };
|
||||
|
||||
var timeout = ManagementEndpoints.ResolveAskTimeout(options);
|
||||
|
||||
Assert.Equal(TimeSpan.FromSeconds(75), timeout);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveAskTimeout_WithNullOptions_FallsBackToDefault()
|
||||
{
|
||||
var timeout = ManagementEndpoints.ResolveAskTimeout(null);
|
||||
|
||||
Assert.Equal(TimeSpan.FromSeconds(30), timeout);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveAskTimeout_WithNonPositiveTimeout_FallsBackToDefault()
|
||||
{
|
||||
// A misconfigured zero/negative timeout would make every Ask fail
|
||||
// immediately; fall back to the safe default instead.
|
||||
var options = new ManagementServiceOptions { CommandTimeout = TimeSpan.Zero };
|
||||
|
||||
var timeout = ManagementEndpoints.ResolveAskTimeout(options);
|
||||
|
||||
Assert.Equal(TimeSpan.FromSeconds(30), timeout);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Text.Json;
|
||||
using MailKit;
|
||||
using MailKit.Net.Smtp;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
@@ -407,6 +408,78 @@ public class NotificationDeliveryServiceTests
|
||||
storage, new StoreAndForwardOptions(), NullLogger<StoreAndForwardService>.Instance);
|
||||
}
|
||||
|
||||
// ── NotificationService-010: SMTP client is disconnected on the failure path ──
|
||||
|
||||
/// <summary>
|
||||
/// An SMTP wrapper that records whether <see cref="DisconnectAsync"/> ran and
|
||||
/// can be told to fail at a chosen stage of the delivery sequence.
|
||||
/// </summary>
|
||||
private sealed class DisconnectTrackingClient : ISmtpClientWrapper, IDisposable
|
||||
{
|
||||
private readonly Func<Exception>? _failOnSend;
|
||||
private readonly Func<Exception>? _failOnAuthenticate;
|
||||
|
||||
public DisconnectTrackingClient(
|
||||
Func<Exception>? failOnSend = null, Func<Exception>? failOnAuthenticate = null)
|
||||
{
|
||||
_failOnSend = failOnSend;
|
||||
_failOnAuthenticate = failOnAuthenticate;
|
||||
}
|
||||
|
||||
public bool Disconnected { get; private set; }
|
||||
public bool Disposed { get; private set; }
|
||||
|
||||
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
|
||||
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
|
||||
=> _failOnAuthenticate != null ? Task.FromException(_failOnAuthenticate()) : Task.CompletedTask;
|
||||
|
||||
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
|
||||
=> _failOnSend != null ? Task.FromException(_failOnSend()) : Task.CompletedTask;
|
||||
|
||||
public Task DisconnectAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
Disconnected = true;
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public void Dispose() => Disposed = true;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Send_TransientFailureDuringSend_StillDisconnectsClient()
|
||||
{
|
||||
// NS-010: DisconnectAsync used to run only on the success path inside the
|
||||
// try block. A failure in SendAsync left the authenticated connection open
|
||||
// (the SMTP QUIT was never issued), leaking server connection slots under
|
||||
// sustained transient failures.
|
||||
SetupHappyPath();
|
||||
var tracking = new DisconnectTrackingClient(
|
||||
failOnSend: () => new SmtpProtocolException("protocol error"));
|
||||
var service = new NotificationDeliveryService(
|
||||
_repository, () => tracking, NullLogger<NotificationDeliveryService>.Instance);
|
||||
|
||||
await service.SendAsync("ops-team", "Alert", "Body");
|
||||
|
||||
Assert.True(tracking.Disconnected, "DeliverAsync must disconnect the SMTP client even when the send fails");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Send_FailureDuringAuthenticate_StillDisconnectsClient()
|
||||
{
|
||||
// NS-010: an AuthenticateAsync failure must also tear the connection down.
|
||||
SetupHappyPath();
|
||||
var tracking = new DisconnectTrackingClient(
|
||||
failOnAuthenticate: () => new SmtpProtocolException("auth handshake failed"));
|
||||
var service = new NotificationDeliveryService(
|
||||
_repository, () => tracking, NullLogger<NotificationDeliveryService>.Instance);
|
||||
|
||||
await service.SendAsync("ops-team", "Alert", "Body");
|
||||
|
||||
Assert.True(tracking.Disconnected, "DeliverAsync must disconnect the SMTP client even when authentication fails");
|
||||
}
|
||||
|
||||
// ── NotificationService-005: explicit TLS mode passed through to the wrapper ──
|
||||
|
||||
/// <summary>An SMTP wrapper that records the TLS mode and timeout it was connected with.</summary>
|
||||
@@ -642,4 +715,77 @@ public class NotificationDeliveryServiceTests
|
||||
Assert.False(result.Success);
|
||||
Assert.Contains("address", result.ErrorMessage, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
// ── NotificationService-012: OAuth2 delivery path coverage ──
|
||||
|
||||
/// <summary>An SMTP wrapper that records the auth type and credentials it received.</summary>
|
||||
private sealed class RecordingAuthClient : ISmtpClientWrapper
|
||||
{
|
||||
public string? AuthType { get; private set; }
|
||||
public string? Credentials { get; private set; }
|
||||
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
|
||||
{
|
||||
AuthType = authType;
|
||||
Credentials = credentials;
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task DisconnectAsync(CancellationToken cancellationToken = default) => Task.CompletedTask;
|
||||
}
|
||||
|
||||
private static OAuth2TokenService CreateTokenService(string accessToken, int expiresIn = 3600)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(new
|
||||
{
|
||||
access_token = accessToken,
|
||||
expires_in = expiresIn,
|
||||
token_type = "Bearer"
|
||||
});
|
||||
var factory = Substitute.For<IHttpClientFactory>();
|
||||
factory.CreateClient(Arg.Any<string>())
|
||||
.Returns(_ => new HttpClient(new StubHttpHandler(json)));
|
||||
return new OAuth2TokenService(factory, NullLogger<OAuth2TokenService>.Instance);
|
||||
}
|
||||
|
||||
private sealed class StubHttpHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly string _json;
|
||||
public StubHttpHandler(string json) => _json = json;
|
||||
protected override Task<HttpResponseMessage> SendAsync(
|
||||
HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
=> Task.FromResult(new HttpResponseMessage(System.Net.HttpStatusCode.OK)
|
||||
{
|
||||
Content = new StringContent(_json)
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Send_OAuth2Config_AuthenticatesWithResolvedAccessToken()
|
||||
{
|
||||
// NS-012: the OAuth2 delivery branch in DeliverAsync (token resolution during
|
||||
// a send) was never exercised — every other test uses Basic Auth and a null
|
||||
// token service. The credentials reaching the SMTP client must be the access
|
||||
// token from OAuth2TokenService, not the raw tenant:client:secret triple.
|
||||
var cfg = new SmtpConfiguration("smtp.office365.com", "oauth2", "noreply@example.com")
|
||||
{
|
||||
Id = 1, Port = 587, Credentials = "tenant1:client1:secret1", TlsMode = "starttls"
|
||||
};
|
||||
SetupHappyPathWithSmtp(cfg);
|
||||
|
||||
var recording = new RecordingAuthClient();
|
||||
var service = new NotificationDeliveryService(
|
||||
_repository,
|
||||
() => recording,
|
||||
NullLogger<NotificationDeliveryService>.Instance,
|
||||
tokenService: CreateTokenService("oauth2-access-token-xyz"));
|
||||
|
||||
var result = await service.SendAsync("ops-team", "Alert", "Body");
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.Equal("oauth2", recording.AuthType);
|
||||
Assert.Equal("oauth2-access-token-xyz", recording.Credentials);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,6 +124,105 @@ public class OAuth2TokenServiceTests
|
||||
Assert.Equal(2, handler.CallCount); // one per distinct credential, not per call
|
||||
}
|
||||
|
||||
// ── NotificationService-012: token expiry/refresh and concurrent acquisition ──
|
||||
|
||||
[Fact]
|
||||
public async Task GetTokenAsync_ExpiredToken_RefreshesOnNextCall()
|
||||
{
|
||||
// NS-012: token expiry/refresh was untested — the cache test used a 3600s
|
||||
// token so the refresh branch never ran. The service refreshes 60s before
|
||||
// the stated expiry, so an expires_in of 60 makes the token immediately
|
||||
// stale and the next call must fetch a fresh one.
|
||||
var handler = new SequenceHttpMessageHandler(
|
||||
TokenJson("first-token", expiresIn: 60),
|
||||
TokenJson("second-token", expiresIn: 3600));
|
||||
var client = new HttpClient(handler);
|
||||
var factory = CreateMockFactory(client);
|
||||
var service = new OAuth2TokenService(factory, NullLogger<OAuth2TokenService>.Instance);
|
||||
|
||||
var token1 = await service.GetTokenAsync("tenant:client:secret");
|
||||
var token2 = await service.GetTokenAsync("tenant:client:secret");
|
||||
|
||||
Assert.Equal("first-token", token1);
|
||||
Assert.Equal("second-token", token2); // refreshed because the first was already stale
|
||||
Assert.Equal(2, handler.CallCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetTokenAsync_ConcurrentCalls_MakeExactlyOneHttpRequest()
|
||||
{
|
||||
// NS-012: the double-checked-locking path was never exercised. Many callers
|
||||
// racing for the same uncached credential must collapse to a single token
|
||||
// fetch, not one HTTP call per caller.
|
||||
var handler = new SlowCountingHttpMessageHandler(
|
||||
TokenJson("concurrent-token", expiresIn: 3600), delay: TimeSpan.FromMilliseconds(100));
|
||||
var client = new HttpClient(handler);
|
||||
var factory = CreateMockFactory(client);
|
||||
var service = new OAuth2TokenService(factory, NullLogger<OAuth2TokenService>.Instance);
|
||||
|
||||
var tasks = Enumerable.Range(0, 20)
|
||||
.Select(_ => service.GetTokenAsync("tenant:client:secret"))
|
||||
.ToArray();
|
||||
var tokens = await Task.WhenAll(tasks);
|
||||
|
||||
Assert.All(tokens, t => Assert.Equal("concurrent-token", t));
|
||||
Assert.Equal(1, handler.CallCount);
|
||||
}
|
||||
|
||||
private static string TokenJson(string accessToken, int expiresIn) =>
|
||||
JsonSerializer.Serialize(new
|
||||
{
|
||||
access_token = accessToken,
|
||||
expires_in = expiresIn,
|
||||
token_type = "Bearer"
|
||||
});
|
||||
|
||||
/// <summary>HTTP handler returning a different response per invocation, in order.</summary>
|
||||
private class SequenceHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly string[] _responses;
|
||||
public int CallCount { get; private set; }
|
||||
|
||||
public SequenceHttpMessageHandler(params string[] responses) => _responses = responses;
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(
|
||||
HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
var body = _responses[Math.Min(CallCount, _responses.Length - 1)];
|
||||
CallCount++;
|
||||
return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)
|
||||
{
|
||||
Content = new StringContent(body)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>HTTP handler that delays and counts invocations (thread-safe count).</summary>
|
||||
private class SlowCountingHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly string _response;
|
||||
private readonly TimeSpan _delay;
|
||||
private int _callCount;
|
||||
public int CallCount => _callCount;
|
||||
|
||||
public SlowCountingHttpMessageHandler(string response, TimeSpan delay)
|
||||
{
|
||||
_response = response;
|
||||
_delay = delay;
|
||||
}
|
||||
|
||||
protected override async Task<HttpResponseMessage> SendAsync(
|
||||
HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
Interlocked.Increment(ref _callCount);
|
||||
await Task.Delay(_delay, cancellationToken);
|
||||
return new HttpResponseMessage(HttpStatusCode.OK)
|
||||
{
|
||||
Content = new StringContent(_response)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HTTP handler that returns a distinct access token per tenant id, parsed from
|
||||
/// the request URL (<c>https://login.microsoftonline.com/{tenantId}/...</c>).
|
||||
|
||||
+75
@@ -672,6 +672,81 @@ public class SecurityReviewRegressionTests2
|
||||
|
||||
#endregion
|
||||
|
||||
#region Code Review Regression Tests — Security-009/011
|
||||
|
||||
/// <summary>
|
||||
/// Regression tests for Security-009 (no LDAP connection timeout — a hung server can
|
||||
/// pin a thread-pool thread indefinitely because <c>ct</c> only guards work-item
|
||||
/// scheduling) and the remaining Security-011 coverage gaps.
|
||||
/// </summary>
|
||||
public class SecurityReviewRegressionTests3
|
||||
{
|
||||
// --- Security-009: LDAP connection timeout must be configurable and bounded ---
|
||||
|
||||
[Fact]
|
||||
public void SecurityOptions_LdapConnectionTimeout_HasSaneDefault()
|
||||
{
|
||||
var options = new SecurityOptions();
|
||||
// A positive, finite default so a hung LDAP server cannot pin a thread forever.
|
||||
Assert.True(options.LdapConnectionTimeoutMs > 0);
|
||||
Assert.True(options.LdapConnectionTimeoutMs <= 60_000,
|
||||
"Default LDAP connection timeout should be bounded (<= 60s).");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AuthenticateAsync_UnreachableHost_FailsWithinConfiguredTimeout()
|
||||
{
|
||||
// A routable-but-non-responsive address would otherwise hang for the OS default
|
||||
// (often minutes). With LdapConnectionTimeoutMs applied to the LdapConnection the
|
||||
// call must give up promptly. 198.51.100.0/24 (TEST-NET-2, RFC 5737) is reserved
|
||||
// and not routed, so the connect attempt stalls until the timeout fires.
|
||||
var options = new SecurityOptions
|
||||
{
|
||||
LdapServer = "198.51.100.1",
|
||||
LdapPort = 389,
|
||||
LdapTransport = LdapTransport.None,
|
||||
AllowInsecureLdap = true,
|
||||
LdapSearchBase = "dc=example,dc=com",
|
||||
LdapConnectionTimeoutMs = 2_000
|
||||
};
|
||||
var service = new LdapAuthService(Options.Create(options), NullLogger<LdapAuthService>.Instance);
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
var result = await service.AuthenticateAsync("user", "password");
|
||||
sw.Stop();
|
||||
|
||||
Assert.False(result.Success);
|
||||
// Generous ceiling: the 2s timeout plus scheduling/CI overhead, far below the
|
||||
// multi-minute OS default that an unconfigured connection would incur.
|
||||
Assert.True(sw.Elapsed < TimeSpan.FromSeconds(30),
|
||||
$"Authentication did not honour the LDAP connection timeout: took {sw.Elapsed}.");
|
||||
}
|
||||
|
||||
// --- Security-011: additional coverage for the no-service-account / DN paths ---
|
||||
|
||||
[Fact]
|
||||
public void BuildFallbackUserDn_NoSearchBase_ReturnsBareRdn()
|
||||
{
|
||||
var dn = LdapAuthService.BuildFallbackUserDn("alice", "", "uid");
|
||||
Assert.Equal("uid=alice", dn);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EscapeLdapDn_LeadingHash_IsEscaped()
|
||||
{
|
||||
Assert.Equal(@"\#admin", LdapAuthService.EscapeLdapDn("#admin"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EscapeLdapDn_NullOrEmpty_ReturnedUnchanged()
|
||||
{
|
||||
Assert.Equal("", LdapAuthService.EscapeLdapDn(""));
|
||||
Assert.Null(LdapAuthService.EscapeLdapDn(null!));
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region WP-9: Authorization Policy Tests
|
||||
|
||||
public class AuthorizationPolicyTests
|
||||
Reference in New Issue
Block a user