Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3f19371017 | |||
| 2d7ac5b57f | |||
| e57ccd78b7 | |||
| e9ee4e3ea5 | |||
| ff4a4bdeb7 | |||
| 7d1cc5cbb4 |
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 6 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -261,7 +261,7 @@ follow-up. The code fix in this module is complete.
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ConfigurationDatabase/Configurations/AuditConfiguration.cs:11` (entity `src/ScadaLink.Commons/Entities/Audit/AuditLogEntry.cs`) |
|
||||
|
||||
**Description**
|
||||
@@ -282,7 +282,20 @@ Resolve the discrepancy in one direction.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Root cause confirmed against source: the
|
||||
`AuditLogEntry` entity declares `int Id`, while the design doc's Audit Entry Schema
|
||||
table said `Long / GUID`. The entity lives in `ScadaLink.Commons`
|
||||
(`src/ScadaLink.Commons/Entities/Audit/AuditLogEntry.cs`), which is outside this
|
||||
module's editable scope, so the discrepancy was resolved by aligning the design doc to
|
||||
the code — the recommendation's second option. The schema table now records `Id` as
|
||||
`int (identity)` with an explicit justification: a 32-bit identity matches the key type
|
||||
of every other entity in the schema (uniform repository/query code), and at a sustained
|
||||
100 rows/second the `int` range is not exhausted for roughly 680 years, so the
|
||||
indefinite-retention policy poses no realistic overflow risk; if a future deployment
|
||||
ever approaches the limit the column can be widened to `bigint` via a migration without
|
||||
a schema redesign. No regression test is meaningful for a documentation alignment; the
|
||||
existing `AuditConfiguration` (`HasKey(a => a.Id)`) and the audit repository tests
|
||||
already exercise the `int` key end to end.
|
||||
|
||||
### ConfigurationDatabase-006 — `Site.GrpcNodeAAddress` / `GrpcNodeBAddress` columns are unbounded
|
||||
|
||||
@@ -290,7 +303,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ConfigurationDatabase/Configurations/SiteConfiguration.cs:24-25` |
|
||||
|
||||
**Description**
|
||||
@@ -310,7 +323,19 @@ generate a migration to alter the column types.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Root cause confirmed against source:
|
||||
`SiteConfiguration` configured `NodeAAddress`/`NodeBAddress` with `HasMaxLength(500)` but
|
||||
left `GrpcNodeAAddress`/`GrpcNodeBAddress` unconfigured, so EF mapped them to
|
||||
`nvarchar(max)` — inconsistent with the sibling columns and non-indexable. Applied the
|
||||
recommendation: added `builder.Property(s => s.GrpcNodeAAddress).HasMaxLength(500)` and
|
||||
the same for `GrpcNodeBAddress`. Generated migration
|
||||
`20260517020720_BoundGrpcNodeAddressLength` altering both columns from `nvarchar(max)`
|
||||
to `nvarchar(500)` (the model snapshot was updated to match). Regression tests added in
|
||||
`SchemaConfigurationTests.cs`:
|
||||
`GrpcNodeAddressColumns_AreLengthBoundedTo500` (theory over both columns, asserting the
|
||||
EF model metadata reports `MaxLength == 500`) and
|
||||
`GrpcNodeAddressColumns_MatchSiblingNodeAddressBounds` (asserting the gRPC columns share
|
||||
the bound of the `NodeAAddress`/`NodeBAddress` siblings).
|
||||
|
||||
### ConfigurationDatabase-007 — `AuditService` does not handle JSON-serialization failure of arbitrary `afterState`
|
||||
|
||||
@@ -367,7 +392,7 @@ added in `AuditServiceTests.cs`:
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ConfigurationDatabase/Repositories/InboundApiRepository.cs:46-58` |
|
||||
|
||||
**Description**
|
||||
@@ -390,7 +415,25 @@ gives referential integrity and correct cascade behaviour when an API key is del
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Root cause confirmed against source:
|
||||
`GetApprovedKeysForMethodAsync` mapped each CSV token with
|
||||
`int.TryParse(...) ? id : -1` then filtered `id > 0`, so any unparseable (or
|
||||
non-positive) token was discarded with no signal — a corrupt `ApprovedApiKeyIds` value
|
||||
silently approves fewer keys than intended, an authorization-relevant outcome.
|
||||
|
||||
Applied the recommendation's short-term fix: the parse loop was rewritten to log a
|
||||
warning for every token that fails to parse to a positive integer, naming the method id
|
||||
and the offending token, so corruption is observable in logs. Valid ids still resolve
|
||||
normally. `InboundApiRepository` gained an optional `ILogger<InboundApiRepository>`
|
||||
constructor parameter (defaulting to `NullLogger`, matching the `MigrationHelper`
|
||||
pattern) and the project now references `Microsoft.Extensions.Logging.Abstractions`. The
|
||||
longer-term join-table redesign would change the `ApiMethod` entity / schema and the
|
||||
`IInboundApiRepository` contract (Commons, out of this module's scope) and is left as a
|
||||
future schema-design item. Regression tests added in `InboundApiRepositoryTests.cs`:
|
||||
`GetApprovedKeysForMethod_WithMalformedCsvToken_LogsWarningAndDropsToken`,
|
||||
`GetApprovedKeysForMethod_WithValidCsv_ReturnsAllKeys`, and
|
||||
`GetApprovedKeysForMethod_WithNullOrEmptyCsv_ReturnsEmptyWithoutWarning` (using a
|
||||
capturing `ILogger` to assert the warning is emitted only on malformed input).
|
||||
|
||||
### ConfigurationDatabase-009 — Multi-collection eager loads issue cartesian-product queries
|
||||
|
||||
@@ -398,7 +441,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ConfigurationDatabase/Repositories/TemplateEngineRepository.cs:43-51,53-61`, `src/ScadaLink.ConfigurationDatabase/Repositories/CentralUiRepository.cs:45-55` |
|
||||
|
||||
**Description**
|
||||
@@ -421,7 +464,24 @@ cartesian explosion is avoided.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Root cause confirmed against source:
|
||||
`GetAllTemplatesAsync` and `GetTemplatesComposingAsync` (`TemplateEngineRepository`) and
|
||||
`GetTemplateTreeAsync` (`CentralUiRepository`) each `Include` three-to-four sibling
|
||||
collections in a single query, producing a cartesian-product join. The same shape was
|
||||
also present in `GetTemplateByIdAsync`, `GetInstanceByIdAsync`, `GetAllInstancesAsync`,
|
||||
`GetInstancesBySiteIdAsync`, and `GetInstanceByUniqueNameAsync`.
|
||||
|
||||
Applied the recommendation's per-query option: `.AsSplitQuery()` was added to every
|
||||
multi-collection-include query in `TemplateEngineRepository` (eight call sites) and to
|
||||
`GetTemplateTreeAsync` in `CentralUiRepository`, so each collection loads with its own
|
||||
query and the cartesian explosion is avoided. Per-query `AsSplitQuery()` was preferred
|
||||
over a global `UseQuerySplittingBehavior` so single-collection queries elsewhere keep
|
||||
the cheaper single-query plan. Split queries change query *shape* only, not results;
|
||||
regression tests added in `SchemaConfigurationTests.cs` pin that behaviour:
|
||||
`GetAllTemplatesAsync_WithMultipleMembersPerCollection_LoadsAllWithoutDuplication`
|
||||
(a template with 3 attributes, 2 alarms, 4 scripts must return exactly those counts —
|
||||
not a 24-row cartesian product) and
|
||||
`GetTemplateByIdAsync_WithMultipleMembers_LoadsAllCollections`.
|
||||
|
||||
### ConfigurationDatabase-010 — Several repositories and `InstanceLocator` lack direct test coverage
|
||||
|
||||
@@ -429,7 +489,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ConfigurationDatabase/Repositories/TemplateEngineRepository.cs`, `Repositories/DeploymentManagerRepository.cs`, `Repositories/ExternalSystemRepository.cs`, `Repositories/InboundApiRepository.cs`, `Repositories/NotificationRepository.cs`, `Repositories/SiteRepository.cs`, `Services/InstanceLocator.cs` |
|
||||
|
||||
**Description**
|
||||
@@ -453,7 +513,24 @@ and `InstanceLocator.GetSiteIdForInstanceAsync` for found/not-found cases.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Direct repository/service tests were added using
|
||||
the existing `SqliteTestHelper` pattern. `InboundApiRepositoryTests.cs` covers
|
||||
`InboundApiRepository` (API-key/method CRUD round-trips and the
|
||||
`GetApprovedKeysForMethodAsync` valid/malformed/empty-CSV cases — see CD-008).
|
||||
`RepositoryCoverageTests.cs` adds `ExternalSystemRepositoryTests` (definition/method CRUD,
|
||||
parent-filtered method query, database-connection delete), `NotificationRepositoryTests`
|
||||
(notification-list-with-recipients and SMTP-configuration round-trips, list delete),
|
||||
`SiteRepositoryTests` (site/identifier round-trip plus the stub-attach delete fallback
|
||||
exercised for both `DeleteSiteAsync` and `DeleteDataConnectionAsync` by clearing the
|
||||
ChangeTracker, and the site-filtered instance query), `DeploymentManagerRepositoryTests`
|
||||
(deployment-record CRUD and `GetCurrentDeploymentStatusAsync` ordering, the stub-attach
|
||||
`DeleteDeploymentRecordAsync` fallback, and `DeleteInstanceAsync`'s explicit
|
||||
Restrict-FK deployment-record cleanup), and `InstanceLocatorTests`
|
||||
(`GetSiteIdForInstanceAsync` for the found and not-found cases). `TemplateEngineRepository`
|
||||
gained the CD-001 and CD-009 regression tests
|
||||
(`TemplateEngineRepositoryTests.cs`, `SchemaConfigurationTests.cs`). A constructor
|
||||
null-guard test was added for each of the five repositories/services covered, doubling
|
||||
as the CD-011 regression guard. The full module suite is green.
|
||||
|
||||
### ConfigurationDatabase-011 — Inconsistent constructor null-guarding across repositories/services
|
||||
|
||||
@@ -461,7 +538,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ConfigurationDatabase/Repositories/ExternalSystemRepository.cs:11-14`, `Repositories/InboundApiRepository.cs:11-14`, `Repositories/NotificationRepository.cs:11-14`, `Services/InstanceLocator.cs:13-16` |
|
||||
|
||||
**Description**
|
||||
@@ -482,4 +559,14 @@ inconsistent constructors so all data-access types behave uniformly.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Root cause confirmed against source:
|
||||
`ExternalSystemRepository`, `InboundApiRepository`, `NotificationRepository`, and
|
||||
`InstanceLocator` assigned the injected `ScadaLinkDbContext` directly with no null
|
||||
guard, diverging from `SecurityRepository`/`CentralUiRepository`/`TemplateEngineRepository`/
|
||||
`DeploymentManagerRepository`/`SiteRepository`/`AuditService`. Applied the recommendation:
|
||||
all four constructors now use `context ?? throw new ArgumentNullException(nameof(context))`
|
||||
(`InboundApiRepository`'s guard was added as part of its CD-008 constructor change), so
|
||||
every data-access type behaves uniformly and a hand-constructed instance fails with an
|
||||
informative exception at construction rather than a later `NullReferenceException`.
|
||||
Regression: `Constructor_NullContext_Throws` tests were added for all four affected types
|
||||
(`InboundApiRepositoryTests.cs`, `RepositoryCoverageTests.cs`).
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 2 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -381,7 +381,7 @@ after.
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DataConnectionLayer/Actors/DataConnectionActor.cs:540-569` |
|
||||
|
||||
**Description**
|
||||
@@ -404,7 +404,26 @@ prior state captured before removal.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). A `tagPath → subscriber-count` reverse index
|
||||
(`_tagSubscriberCount`) was added: `HandleSubscribeCompleted` increments it whenever a
|
||||
tag is newly added to an instance's set, and `HandleUnsubscribe` decrements it,
|
||||
releasing a tag at the adapter only when the count reaches zero. The "any other
|
||||
subscriber" check is now O(1) per tag instead of an O(instances) `Where(...).Any()`
|
||||
scan. The redundant `!_unresolvedTags.Contains(tagPath)` re-check (always true after
|
||||
the unconditional `Remove` on the line above) was removed — the surviving branch is
|
||||
entered only for tags that have a subscription id, which are by definition resolved,
|
||||
so `_resolvedTags--` is now unconditional with an explanatory comment. The cleanup
|
||||
also fixed a latent leak the original code could not reach: an unresolved tag whose
|
||||
last subscriber unsubscribes is now removed from `_unresolvedTags`/`_resolutionInFlight`
|
||||
and decremented from `_totalSubscribed` (previously it lingered in the retry timer and
|
||||
the subscribed total forever). Regression test
|
||||
`DCL008_Unsubscribe_OnlyReleasesTagWhenLastSubscriberLeaves` subscribes a tag from two
|
||||
instances plus an exclusive tag, then unsubscribes each instance and asserts the
|
||||
shared tag is released at the adapter only after the last subscriber leaves and the
|
||||
health counters track correctly. (This finding is a performance refactor, not a
|
||||
correctness bug — the pre-fix `Where(...).Any()` logic was functionally correct, so
|
||||
the test passes against both versions and serves as a behavioural guard for the
|
||||
refactor.)
|
||||
|
||||
### DataConnectionLayer-009 — Implemented failover heuristic diverges from the documented state machine
|
||||
|
||||
@@ -606,7 +625,7 @@ deliberately not made here because this task is scoped to
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DataConnectionLayer/Adapters/OpcUaDataConnection.cs:270-281` |
|
||||
|
||||
**Description**
|
||||
@@ -629,4 +648,16 @@ under a race and is tolerated downstream."
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Rather than weaken the XML comment to match the
|
||||
weak guard, the guard was made genuinely atomic so the documented "only the first
|
||||
caller fires the event" guarantee becomes true. `OpcUaDataConnection._disconnectFired`
|
||||
and `RealOpcUaClient._connectionLostFired` were changed from `volatile bool` to `int`,
|
||||
and the check-then-set in `RaiseDisconnected` / `OnSessionKeepAlive` replaced with a
|
||||
single `Interlocked.Exchange(ref flag, 1) != 0` compare-and-set; the reset on connect
|
||||
uses `Interlocked.Exchange(ref flag, 0)`. The XML comments on both methods were updated
|
||||
to describe the atomic compare-and-set explicitly. Regression test
|
||||
`DCL013_ConcurrentConnectionLost_RaisesDisconnectedExactlyOnce` runs 25 rounds, each
|
||||
fanning 32 barrier-synchronised threads that raise the client's `ConnectionLost` event
|
||||
simultaneously, and asserts `Disconnected` fires exactly once per round; against a
|
||||
non-atomic check-then-set it double-fires (verified by temporarily reverting the
|
||||
guard), and it passes against the atomic fix.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 5 |
|
||||
| Open findings | 1 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -423,7 +423,7 @@ configuration binding. Regression tests:
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:288` |
|
||||
|
||||
**Description**
|
||||
@@ -436,6 +436,9 @@ into the comment and not derived from any constant in this module. If
|
||||
`LifecycleTimeout` is reconfigured, the comment becomes wrong. It also wrongly
|
||||
implies the value lives in this module.
|
||||
|
||||
**Verification:** Confirmed against source. The `DeleteInstanceAsync` XML doc
|
||||
quoted a hard-coded "30s" value.
|
||||
|
||||
**Recommendation**
|
||||
|
||||
Reword to "Delete fails if the site is unreachable within
|
||||
@@ -443,7 +446,12 @@ Reword to "Delete fails if the site is unreachable within
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): the `DeleteInstanceAsync` XML doc no
|
||||
longer quotes a hard-coded "30s" — it now states delete fails if the site is
|
||||
unreachable within `CommunicationOptions.LifecycleTimeout` (and notes the
|
||||
deadline is applied inside `CommunicationService.DeleteInstanceAsync`).
|
||||
Documentation-only change; no regression test (a test asserting comment text
|
||||
would be meaningless).
|
||||
|
||||
### DeploymentManager-010 — `SystemArtifactDeploymentRecord` does not persist the deployment ID
|
||||
|
||||
@@ -451,7 +459,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DeploymentManager/ArtifactDeploymentService.cs:136,194-211` |
|
||||
|
||||
**Description**
|
||||
@@ -465,6 +473,9 @@ stored record. Additionally each per-site `DeployArtifactsCommand` carries its
|
||||
own separate GUID (`BuildDeployArtifactsCommandAsync` line 114), so there are in
|
||||
fact N+1 unrelated IDs for one logical artifact deployment.
|
||||
|
||||
**Verification:** Confirmed against source. Each per-site command minted its own
|
||||
GUID and the persisted record had no way to reference the logical id.
|
||||
|
||||
**Recommendation**
|
||||
|
||||
Add a `DeploymentId` column to `SystemArtifactDeploymentRecord` and store the
|
||||
@@ -473,7 +484,23 @@ per-site commands so the audit log, UI summary, and persisted record agree.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): `BuildDeployArtifactsCommandAsync` now
|
||||
accepts an optional `deploymentId`, and `DeployToAllSitesAsync` passes the one
|
||||
logical `deploymentId` to every per-site command — so the per-site commands,
|
||||
the audit log, and the UI summary all reference a single id instead of N+1
|
||||
unrelated GUIDs (`RetryForSiteAsync`, an independent single-site retry, still
|
||||
mints its own id). Adding a dedicated `DeploymentId` *column* to
|
||||
`SystemArtifactDeploymentRecord` was deliberately **not** done: that entity
|
||||
lives in `ScadaLink.Commons` with its EF mapping in
|
||||
`ScadaLink.ConfigurationDatabase`, both outside this module's edit scope.
|
||||
Instead the logical `deploymentId` is embedded in the record's free-form
|
||||
`PerSiteStatus` JSON payload (`{ DeploymentId, Sites }`), which is fully within
|
||||
this module's control, so the persisted record is correlatable with the
|
||||
summary/audit. A follow-up to promote it to a first-class column should be
|
||||
filed against Commons/ConfigurationDatabase if a queryable index is needed.
|
||||
Regression tests: `DeployToAllSitesAsync_AllPerSiteCommandsShareTheSummaryDeploymentId`,
|
||||
`DeployToAllSitesAsync_PartialFailure_ReportsPerSiteMatrix`,
|
||||
`RetryForSiteAsync_SiteSucceeds_ReturnsSuccessAndAudits`.
|
||||
|
||||
### DeploymentManager-011 — Tests never exercise a successful deployment or lifecycle success path
|
||||
|
||||
@@ -536,7 +563,7 @@ which asserts on `IAuditService.LogAsync`. Regression tests:
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.DeploymentManager/DeploymentManagerOptions.cs:8-9` |
|
||||
|
||||
**Description**
|
||||
@@ -547,6 +574,9 @@ default and an XML doc, but it is never read anywhere in the codebase
|
||||
`CommunicationService`). The option misleads readers into thinking it controls
|
||||
disable/enable/delete timeouts, when setting it has no effect.
|
||||
|
||||
**Verification:** Confirmed against source. A repo-wide grep found exactly one
|
||||
occurrence of `LifecycleCommandTimeout` — the declaration itself.
|
||||
|
||||
**Recommendation**
|
||||
|
||||
Remove `LifecycleCommandTimeout`, or actually thread it through to the
|
||||
@@ -556,7 +586,21 @@ lifecycle command calls (e.g. by creating a linked CTS with this timeout in
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): `LifecycleCommandTimeout` is now actually
|
||||
threaded through (the option exists for tuning, so it was wired up rather than
|
||||
deleted). `DisableInstanceAsync`/`EnableInstanceAsync`/`DeleteInstanceAsync`
|
||||
each create a linked `CancellationTokenSource` with `CancelAfter(
|
||||
_options.LifecycleCommandTimeout)` — the same pattern `ArtifactDeploymentService`
|
||||
uses for `ArtifactDeploymentTimeoutPerSite` — and pass its token to the
|
||||
`CommunicationService` call. Each method now catches the resulting
|
||||
`TimeoutException`/`OperationCanceledException`, logs a warning, and returns a
|
||||
`Result.Failure` (previously an `AskTimeoutException` from a hung site escaped
|
||||
uncaught). The option's XML doc was corrected to describe the real behaviour.
|
||||
Regression test:
|
||||
`DisableInstanceAsync_SiteUnresponsive_LifecycleCommandTimeoutBoundsTheWait`
|
||||
(asserts a 300 ms `LifecycleCommandTimeout` bounds the wait far below the 30 s
|
||||
`CommunicationOptions.LifecycleTimeout`; confirmed to fail before the fix —
|
||||
the call hung the full 30 s and threw `AskTimeoutException`).
|
||||
|
||||
### DeploymentManager-013 — SMTP credentials serialized and broadcast to all sites
|
||||
|
||||
@@ -585,9 +629,35 @@ Confirm inter-cluster transport encryption covers artifact commands, ensure
|
||||
SMTP credentials on site SQLite. Consider encrypting the credential field
|
||||
within the artifact payload.
|
||||
|
||||
**Verification (2026-05-16):** Re-triaged against source. The DeploymentManager
|
||||
side is **clean**: `ArtifactDeploymentService` maps `SmtpConfiguration.Credentials`
|
||||
into the artifact (which the design explicitly mandates — SMTP configuration is
|
||||
a deployable artifact) and **never logs it** — the three log statements in
|
||||
`DeployToAllSitesAsync` only reference `SiteId`, `SiteName`, `DeploymentId`, and
|
||||
`ex.Message`, never the credential. There is no defect to fix purely within
|
||||
`src/ScadaLink.DeploymentManager`. The finding's remaining recommendations are
|
||||
all cross-module and one needs a design decision:
|
||||
- inter-cluster transport TLS — `ScadaLink.Communication` /
|
||||
`ScadaLink.ClusterInfrastructure` (Akka remoting + ClusterClient config);
|
||||
- at-rest encryption of the credential on site SQLite — `ScadaLink.SiteRuntime`
|
||||
artifact store;
|
||||
- encrypting the credential field inside the artifact payload — needs the
|
||||
`SmtpConfigurationArtifact` shape in `ScadaLink.Commons` plus cooperating
|
||||
producer (DeploymentManager) and consumer (SiteRuntime) changes, and a
|
||||
**key-management design decision** (where the encryption key lives, how it
|
||||
is distributed to sites) that cannot be made unilaterally here.
|
||||
|
||||
**Status: Open — flagged.** No purely-DeploymentManager fix exists; the work
|
||||
crosses Communication / SiteRuntime / Commons and requires a key-management
|
||||
design decision. Severity confirmed Low: with TLS-protected inter-cluster
|
||||
transport (a separate, assumed-in-place control) and no logging leak, this is a
|
||||
hardening item, not an active leak.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
_Unresolved — see Verification above. Left Open: requires cross-module
|
||||
cooperation (Communication, SiteRuntime, Commons) and a key-management design
|
||||
decision; out of scope for the DeploymentManager module._
|
||||
|
||||
### DeploymentManager-014 — Dead `CreateCommand` helper in artifact tests
|
||||
|
||||
@@ -595,7 +665,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `tests/ScadaLink.DeploymentManager.Tests/ArtifactDeploymentServiceTests.cs:86-90` |
|
||||
|
||||
**Description**
|
||||
@@ -606,6 +676,10 @@ multi-site artifact deployment) was never written — coverage of
|
||||
`DeployToAllSitesAsync` is limited to the no-sites failure case, and
|
||||
`RetryForSiteAsync` and `BuildDeployArtifactsCommandAsync` have no tests at all.
|
||||
|
||||
**Verification:** Confirmed against source. The `CreateCommand()` helper had no
|
||||
callers, and `DeployToAllSitesAsync`/`RetryForSiteAsync` only had the no-sites
|
||||
failure case.
|
||||
|
||||
**Recommendation**
|
||||
|
||||
Either remove the unused helper or, preferably, write the missing tests for
|
||||
@@ -614,4 +688,13 @@ Either remove the unused helper or, preferably, write the missing tests for
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending): took the recommendation's preferred
|
||||
option — removed the dead `CreateCommand()` helper and wrote the missing
|
||||
coverage instead. `ArtifactDeploymentServiceTests` now extends `TestKit` and
|
||||
uses a stand-in `ArtifactProbeActor` (records the `DeployArtifactsCommand`s it
|
||||
receives, replies success or, for a configured failure set, failure) so
|
||||
`DeployToAllSitesAsync` and `RetryForSiteAsync` are exercised end-to-end past
|
||||
the communication boundary. New tests:
|
||||
`DeployToAllSitesAsync_AllPerSiteCommandsShareTheSummaryDeploymentId` (also
|
||||
covers DeploymentManager-010), `DeployToAllSitesAsync_PartialFailure_ReportsPerSiteMatrix`
|
||||
(per-site success/failure matrix), `RetryForSiteAsync_SiteSucceeds_ReturnsSuccessAndAudits`.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 4 |
|
||||
| Open findings | 1 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -534,7 +534,7 @@ exception propagates; it was verified to fail before the `try/catch` was added.
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Status | Open |
|
||||
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:231-245`, `src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs:90-97` |
|
||||
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:360-374`, `src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs:169-176` |
|
||||
|
||||
**Description**
|
||||
|
||||
@@ -554,7 +554,26 @@ rather than fetch-all-then-filter.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
2026-05-16 — **Root cause confirmed, but left Open: no correct fix is possible within
|
||||
this module's edit scope.** `ResolveSystemAndMethodAsync`
|
||||
(`ExternalSystemClient.cs:360`) does call `GetAllExternalSystemsAsync()` followed by
|
||||
`GetMethodsByExternalSystemIdAsync()` and filters in memory, and
|
||||
`ResolveConnectionAsync` (`DatabaseGateway.cs:169`) does `GetAllDatabaseConnectionsAsync()`
|
||||
then filters — fetch-all-then-filter on every hot-path call, as described.
|
||||
|
||||
Both recommended fixes require changes outside `src/ScadaLink.ExternalSystemGateway`:
|
||||
(a) a **name-keyed repository lookup** (e.g. `GetExternalSystemByNameAsync`) means adding
|
||||
methods to `IExternalSystemRepository` in `ScadaLink.Commons` and implementing them in
|
||||
`ScadaLink.ConfigurationDatabase` / `ScadaLink.SiteRuntime`; (b) an **in-memory cache
|
||||
invalidated on artifact deployment** requires subscribing to a deployment-applied event
|
||||
owned by `ScadaLink.SiteRuntime` / `ScadaLink.DeploymentManager`. A purely module-local
|
||||
cache with a time-based TTL was rejected as a fix: definitions only change on deployment
|
||||
and must reflect a deployment promptly, so a TTL would either be too short to help the
|
||||
hot path or long enough to serve stale definitions after a redeploy — trading a
|
||||
correctness hazard for a performance gain on a Low-severity issue. **Tracked follow-up:**
|
||||
add a name-keyed lookup to `IExternalSystemRepository` (Commons) and have the gateway use
|
||||
it, or add a deployment-invalidated definition cache wired from SiteRuntime. No source
|
||||
change was made in this module.
|
||||
|
||||
### ExternalSystemGateway-012 — Permanent-failure logging requirement is not met; `_logger` is injected but unused
|
||||
|
||||
@@ -562,7 +581,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:24,169-177`, `src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs:22` |
|
||||
|
||||
**Description**
|
||||
@@ -585,7 +604,21 @@ caller's responsibility and remove the unused `_logger` fields. Add a comment in
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Partial re-triage of the finding text: the
|
||||
"`_logger` injected but unused" claim is **stale** — both loggers are already used (the
|
||||
`DeliverBufferedAsync` retry-sweep handlers added by `ExternalSystemGateway-001` log
|
||||
park/error events). The genuine remaining gap — `InvokeHttpAsync` performing no logging
|
||||
on the HTTP-failure paths — is now fixed: `InvokeHttpAsync` emits a
|
||||
`_logger.LogWarning` on the permanent-failure path (status code, system, method,
|
||||
truncated error body) so a permanent failure is visible in Site Event Logging as the
|
||||
design requires, and a `_logger.LogDebug` on the transient-failure path (transient
|
||||
failures are normal retry/S&F operation and must not be noisy at warning level). A
|
||||
documentation comment was added to `ErrorClassifier.IsTransient(HttpStatusCode)`
|
||||
explaining the "any other non-success status defaults to permanent" behaviour and why
|
||||
permanent is the safe default. Regression tests: `Call_PermanentFailure_LogsAWarning`
|
||||
(asserts a warning carrying the system name is emitted; verified to fail before the
|
||||
`LogWarning` was added) and `Call_TransientFailure_DoesNotLogAtWarningOrAbove` (guards
|
||||
against over-logging transient failures).
|
||||
|
||||
### ExternalSystemGateway-013 — `MaxConcurrentConnectionsPerSystem` and `DefaultHttpTimeout` options are defined but never used
|
||||
|
||||
@@ -593,7 +626,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemGatewayOptions.cs:9,12`, `src/ScadaLink.ExternalSystemGateway/ServiceCollectionExtensions.cs:13` |
|
||||
|
||||
**Description**
|
||||
@@ -614,7 +647,20 @@ options to avoid implying behaviour that does not exist.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Partial re-triage of the finding text:
|
||||
`DefaultHttpTimeout` is **no longer unused** — it became the effective per-call HTTP
|
||||
round-trip limit when `ExternalSystemGateway-002` was fixed (`InvokeHttpAsync` builds a
|
||||
linked `CancellationTokenSource(DefaultHttpTimeout)`). The genuinely-unused option,
|
||||
`MaxConcurrentConnectionsPerSystem`, is now wired in: `AddExternalSystemGateway` adds a
|
||||
`ConfigureHttpClientDefaults` registration that supplies a `SocketsHttpHandler` whose
|
||||
`MaxConnectionsPerServer` is bound from the option, so it applies to every per-system
|
||||
named client (`ExternalSystem_{name}`) the gateway creates rather than being silently
|
||||
ignored. Regression test
|
||||
`ServiceWiringTests.MaxConcurrentConnectionsPerSystem_IsAppliedToTheNamedHttpClientPrimaryHandler`
|
||||
builds the DI container, resolves the named client's handler chain via
|
||||
`IHttpMessageHandlerFactory`, walks to the primary handler and asserts
|
||||
`SocketsHttpHandler.MaxConnectionsPerServer` equals the configured value; it was verified
|
||||
to fail before the wiring was added.
|
||||
|
||||
### ExternalSystemGateway-014 — Cached-call buffering path and `DatabaseGateway` are untested
|
||||
|
||||
@@ -622,8 +668,8 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Status | Open |
|
||||
| Location | `tests/ScadaLink.ExternalSystemGateway.Tests/ExternalSystemClientTests.cs:1`, (no `DatabaseGatewayTests.cs`) |
|
||||
| Status | Resolved |
|
||||
| Location | `tests/ScadaLink.ExternalSystemGateway.Tests/ExternalSystemClientTests.cs:1`, `tests/ScadaLink.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs` |
|
||||
|
||||
**Description**
|
||||
|
||||
@@ -647,4 +693,29 @@ by asserting on the captured `HttpRequestMessage` in the mock handler.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit pending). Partial re-triage of the finding text: a number
|
||||
of the listed gaps were **already closed** by tests added when findings 001–010 were
|
||||
fixed — `DatabaseGatewayTests.cs` now exists (not-found, null-S&F guard, buffered-write
|
||||
delivery), and the `CachedCall` transient/buffering and permanent paths are covered by
|
||||
the `ExternalSystemGateway-001/003/004/008` regression tests. The remaining genuine
|
||||
coverage gaps are now closed with new tests:
|
||||
|
||||
- `Call_GetWithParameters_AppendsEscapedQueryString` — `BuildUrl` GET query-string
|
||||
construction, asserting on the captured request URI and that an `&` inside a value is
|
||||
percent-escaped.
|
||||
- `Call_PostWithParameters_SendsJsonBody` — POST JSON-body construction.
|
||||
- `Call_ApiKeyAuthWithDefaultHeader_SendsXApiKeyHeader`,
|
||||
`Call_ApiKeyAuthWithCustomHeader_SendsNamedHeader`,
|
||||
`Call_BasicAuth_SendsBase64AuthorizationHeader` — `ApplyAuth` for all three auth
|
||||
variants, asserting on the captured request headers.
|
||||
- `Call_ConnectionError_IsClassifiedAsTransient` — a connection-level
|
||||
`HttpRequestException` is classified transient.
|
||||
- `CachedWrite_BuffersTheWriteWithConnectionRetrySettings` and
|
||||
`CachedWrite_ZeroMaxRetriesIsHonouredNotTreatedAsUnset` — the `DatabaseGateway`
|
||||
`CachedWrite` happy-path enqueue against a real S&F service.
|
||||
|
||||
The shared `RequestCapturingHandler` test helper was extended to capture request
|
||||
headers and body so URL/auth/body construction is now verified, not just status codes.
|
||||
These are new-coverage tests against already-correct behaviour, so they pass on the
|
||||
current source; the `BuildUrl` and `ApplyAuth` paths they exercise are now protected
|
||||
against regression.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| Last reviewed | 2026-05-16 |
|
||||
| Reviewer | claude-agent |
|
||||
| Commit reviewed | `9c60592` |
|
||||
| Open findings | 5 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -187,7 +187,7 @@ updates and no torn snapshots. No further code change was required for this find
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.HealthMonitoring/CentralHealthAggregator.cs:146-148`, `src/ScadaLink.HealthMonitoring/SiteHealthState.cs:21`, `src/ScadaLink.HealthMonitoring/ICentralHealthAggregator.cs:16` |
|
||||
|
||||
**Description**
|
||||
@@ -207,7 +207,17 @@ comments, ideally referencing the owning component rather than restating a magic
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Documentation-only — no regression test is
|
||||
meaningful. Verified the authoritative cadence against the Communication module:
|
||||
`SiteCommunicationActor.PreStart` schedules the application-level heartbeat to central
|
||||
at `CommunicationOptions.TransportHeartbeatInterval`, which defaults to **5 seconds**
|
||||
(`CommunicationOptions.cs:49`). The stale "~2s" in `ICentralHealthAggregator.MarkHeartbeat`
|
||||
was corrected; all three XML docs (`ICentralHealthAggregator.MarkHeartbeat`,
|
||||
`SiteHealthState.LastHeartbeatAt`, `CentralHealthAggregator.CheckForOfflineSites`) now
|
||||
state the single authoritative ~5s figure and reference the owning component
|
||||
(`Cluster Infrastructure / SiteCommunicationActor` —
|
||||
`CommunicationOptions.TransportHeartbeatInterval`) rather than restating a bare magic
|
||||
number, so readers can reason about the 60s offline grace.
|
||||
|
||||
### HealthMonitoring-005 — Central self-report site can flap offline; no heartbeat grace like real sites
|
||||
|
||||
@@ -259,7 +269,7 @@ offline after 10 minutes) verify the behaviour.
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.HealthMonitoring/HealthReportSender.cs:28`, `src/ScadaLink.HealthMonitoring/CentralHealthReportLoop.cs:32` |
|
||||
|
||||
**Description**
|
||||
@@ -283,7 +293,20 @@ clock dependency is explicit.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). The `HealthReportSender` XML summary was
|
||||
rewritten to describe the real strategy — sequence numbers are monotonic and
|
||||
restart-resetting but explicitly **not** zero/one-based; they are seeded with the
|
||||
current Unix epoch (ms) so a freshly-active node always sorts after the prior active.
|
||||
Both `HealthReportSender` and `CentralHealthReportLoop` now accept an optional
|
||||
`TimeProvider` (defaulting to `TimeProvider.System`) and derive the seed via
|
||||
`GetUtcNow().ToUnixTimeMilliseconds()` in the constructor body instead of reading
|
||||
`DateTimeOffset.UtcNow` at field initialization, so the seeding is deterministically
|
||||
testable and the clock dependency is explicit. `CentralHealthReportLoop` gained a
|
||||
`CurrentSequenceNumber` test accessor mirroring `HealthReportSender`. Regression tests
|
||||
`HealthReportSenderTests.SequenceNumberSeed_UsesInjectedTimeProvider` and
|
||||
`CentralHealthReportLoopTests.SequenceNumberSeed_UsesInjectedTimeProvider` assert the
|
||||
seed equals the injected provider's Unix-ms instant (these would not compile against
|
||||
the pre-fix code, which had no `TimeProvider` parameter).
|
||||
|
||||
### HealthMonitoring-007 — Heartbeats for not-yet-registered sites are silently dropped
|
||||
|
||||
@@ -430,7 +453,7 @@ The HealthMonitoring test suite now stands at 47 passing tests (was 30).
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.HealthMonitoring/HealthReportSender.cs:70-87` |
|
||||
|
||||
**Description**
|
||||
@@ -451,7 +474,21 @@ the failure so persistent degradation is diagnosable; avoid swallowing
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). All three bare `catch { }` blocks in
|
||||
`HealthReportSender.ExecuteAsync` (cluster-nodes refresh, parked-message-count query,
|
||||
S&F buffer-depth query) were changed to `catch (Exception ex)` and now emit a
|
||||
`LogWarning(ex, ...)` naming the failed operation and the site, so a persistent
|
||||
degradation (broken S&F SQLite store, always-throwing `GetClusterNodes()`) is
|
||||
diagnosable instead of silently shipping stale/zero metrics. On the `OperationCanceledException`
|
||||
concern: verified the inner calls cannot raise OCE from cancellation —
|
||||
`IClusterNodeProvider.GetClusterNodes()` is synchronous and takes no token, and
|
||||
`StoreAndForwardStorage.GetParkedMessageCountAsync()`/`GetBufferDepthByCategoryAsync()`
|
||||
take no `CancellationToken` parameter, so the only cancellation path is the outer loop's
|
||||
`PeriodicTimer.WaitForNextTickAsync(stoppingToken)`, which is unaffected. Regression
|
||||
test `HealthReportSenderTests.ClusterNodeRefreshFailure_IsLoggedNotSwallowed` injects a
|
||||
throwing `IClusterNodeProvider`, asserts the loop still ships reports, and asserts a
|
||||
warning carrying the `InvalidOperationException` is logged — confirmed to fail against
|
||||
the pre-fix bare `catch { }` (logged-entry collection was empty).
|
||||
|
||||
### HealthMonitoring-011 — `AddHealthMonitoringActors` is a dead no-op placeholder
|
||||
|
||||
@@ -459,7 +496,7 @@ _Unresolved._
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.HealthMonitoring/ServiceCollectionExtensions.cs:42-46` |
|
||||
|
||||
**Description**
|
||||
@@ -476,15 +513,23 @@ planned, track it in the design doc instead of a half-method.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Verified a codebase-wide search — the
|
||||
`AddHealthMonitoringActors` no-op extension has no production callers (only references
|
||||
were in the code-review docs themselves). The HealthMonitoring module deliberately
|
||||
contains no actors (transport is abstracted behind `IHealthReportTransport`; actor-side
|
||||
wiring lives in the Communication module), so there is no genuine Phase-4 actor work for
|
||||
this method to host. The dead placeholder was removed from `ServiceCollectionExtensions`
|
||||
so a caller can no longer be misled into believing actor wiring exists. No regression
|
||||
test is meaningful for a deleted no-op; removal is verified by the module continuing to
|
||||
build and all 50 tests passing.
|
||||
|
||||
### HealthMonitoring-012 — `SiteHealthState.LatestReport` initialized to `null!`, misrepresenting the contract
|
||||
|
||||
| | |
|
||||
|--|--|
|
||||
| Severity | Low |
|
||||
| Severity | Low — re-triaged: already resolved as a side-effect of HealthMonitoring-002. |
|
||||
| Category | Documentation & comments |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
| Location | `src/ScadaLink.HealthMonitoring/SiteHealthState.cs:11` |
|
||||
|
||||
**Description**
|
||||
@@ -506,4 +551,12 @@ for HealthMonitoring-007.
|
||||
|
||||
**Resolution**
|
||||
|
||||
_Unresolved._
|
||||
Resolved 2026-05-16 (commit `pending`). Re-triaged: verified against the current
|
||||
source — the root cause was already eliminated by the HealthMonitoring-002 /
|
||||
HealthMonitoring-007 fixes. `SiteHealthState.LatestReport` is already declared
|
||||
`SiteHealthReport? LatestReport { get; init; }` (the recommendation's "make it properly
|
||||
nullable" option) with an XML doc explaining the `null` case ("known only via heartbeats,
|
||||
has not yet sent a report"). A codebase-wide search confirms no `null!` suppression
|
||||
remains anywhere in `src/ScadaLink.HealthMonitoring`. This is exactly the change
|
||||
HealthMonitoring-002 made when converting `SiteHealthState` to an immutable record, so
|
||||
the contract is now honest and no further code change was required.
|
||||
|
||||
+8
-28
@@ -42,8 +42,8 @@ module file and counted in **Total**.
|
||||
| Critical | 0 |
|
||||
| High | 0 |
|
||||
| Medium | 4 |
|
||||
| Low | 66 |
|
||||
| **Total** | **70** |
|
||||
| Low | 46 |
|
||||
| **Total** | **50** |
|
||||
|
||||
## Module Status
|
||||
|
||||
@@ -54,11 +54,11 @@ module file and counted in **Total**.
|
||||
| [ClusterInfrastructure](ClusterInfrastructure/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 8 |
|
||||
| [Commons](Commons/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/1 | 1 | 12 |
|
||||
| [Communication](Communication/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 11 |
|
||||
| [ConfigurationDatabase](ConfigurationDatabase/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/6 | 6 | 11 |
|
||||
| [DataConnectionLayer](DataConnectionLayer/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/2 | 2 | 13 |
|
||||
| [DeploymentManager](DeploymentManager/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/5 | 5 | 14 |
|
||||
| [ExternalSystemGateway](ExternalSystemGateway/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/4 | 4 | 14 |
|
||||
| [HealthMonitoring](HealthMonitoring/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/5 | 5 | 12 |
|
||||
| [ConfigurationDatabase](ConfigurationDatabase/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 11 |
|
||||
| [DataConnectionLayer](DataConnectionLayer/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 13 |
|
||||
| [DeploymentManager](DeploymentManager/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/1 | 1 | 14 |
|
||||
| [ExternalSystemGateway](ExternalSystemGateway/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/1 | 1 | 14 |
|
||||
| [HealthMonitoring](HealthMonitoring/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/0 | 0 | 12 |
|
||||
| [Host](Host/findings.md) | 2026-05-16 | `9c60592` | 0/0/1/7 | 8 | 11 |
|
||||
| [InboundAPI](InboundAPI/findings.md) | 2026-05-16 | `9c60592` | 0/0/1/5 | 6 | 13 |
|
||||
| [ManagementService](ManagementService/findings.md) | 2026-05-16 | `9c60592` | 0/0/0/5 | 5 | 13 |
|
||||
@@ -93,33 +93,13 @@ _None open._
|
||||
| Host-002 | [Host](Host/findings.md) | Akka.Persistence required by REQ-HOST-6 is not configured and not used |
|
||||
| InboundAPI-007 | [InboundAPI](InboundAPI/findings.md) | `Database.Connection()` script API from the design doc is not implemented |
|
||||
|
||||
### Low (66)
|
||||
### Low (46)
|
||||
|
||||
| ID | Module | Title |
|
||||
|----|--------|-------|
|
||||
| Commons-008 | [Commons](Commons/findings.md) | `SetConnectionBindingsCommand` uses `ValueTuple` in a wire message contract |
|
||||
| ConfigurationDatabase-005 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | Audit `Id` type disagrees with the design doc |
|
||||
| ConfigurationDatabase-006 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | `Site.GrpcNodeAAddress` / `GrpcNodeBAddress` columns are unbounded |
|
||||
| ConfigurationDatabase-008 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | `GetApprovedKeysForMethodAsync` CSV parsing silently drops malformed ids |
|
||||
| ConfigurationDatabase-009 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | Multi-collection eager loads issue cartesian-product queries |
|
||||
| ConfigurationDatabase-010 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | Several repositories and `InstanceLocator` lack direct test coverage |
|
||||
| ConfigurationDatabase-011 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | Inconsistent constructor null-guarding across repositories/services |
|
||||
| DataConnectionLayer-008 | [DataConnectionLayer](DataConnectionLayer/findings.md) | `HandleUnsubscribe` is O(n^2) over instances and rechecks `_unresolvedTags` redundantly |
|
||||
| DataConnectionLayer-013 | [DataConnectionLayer](DataConnectionLayer/findings.md) | Misleading XML comment: `RaiseDisconnected` claims thread safety it does not provide |
|
||||
| DeploymentManager-009 | [DeploymentManager](DeploymentManager/findings.md) | Misleading timeout comment on `DeleteInstanceAsync` |
|
||||
| DeploymentManager-010 | [DeploymentManager](DeploymentManager/findings.md) | `SystemArtifactDeploymentRecord` does not persist the deployment ID |
|
||||
| DeploymentManager-012 | [DeploymentManager](DeploymentManager/findings.md) | `LifecycleCommandTimeout` option is dead code |
|
||||
| DeploymentManager-013 | [DeploymentManager](DeploymentManager/findings.md) | SMTP credentials serialized and broadcast to all sites |
|
||||
| DeploymentManager-014 | [DeploymentManager](DeploymentManager/findings.md) | Dead `CreateCommand` helper in artifact tests |
|
||||
| ExternalSystemGateway-011 | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | Every call performs a full repository scan of all systems and methods |
|
||||
| ExternalSystemGateway-012 | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | Permanent-failure logging requirement is not met; `_logger` is injected but unused |
|
||||
| ExternalSystemGateway-013 | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | `MaxConcurrentConnectionsPerSystem` and `DefaultHttpTimeout` options are defined but never used |
|
||||
| ExternalSystemGateway-014 | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | Cached-call buffering path and `DatabaseGateway` are untested |
|
||||
| HealthMonitoring-004 | [HealthMonitoring](HealthMonitoring/findings.md) | Inconsistent heartbeat interval described across XML docs |
|
||||
| HealthMonitoring-006 | [HealthMonitoring](HealthMonitoring/findings.md) | Sequence seeding contradicts the doc's "starting at 1" wording and is untestable |
|
||||
| HealthMonitoring-010 | [HealthMonitoring](HealthMonitoring/findings.md) | `HealthReportSender` silently swallows inner failures with bare `catch {}` |
|
||||
| HealthMonitoring-011 | [HealthMonitoring](HealthMonitoring/findings.md) | `AddHealthMonitoringActors` is a dead no-op placeholder |
|
||||
| HealthMonitoring-012 | [HealthMonitoring](HealthMonitoring/findings.md) | `SiteHealthState.LatestReport` initialized to `null!`, misrepresenting the contract |
|
||||
| Host-005 | [Host](Host/findings.md) | Blocking sync-over-async (`GetAwaiter().GetResult()`) inside `StartAsync` |
|
||||
| Host-006 | [Host](Host/findings.md) | HOCON assembled by unescaped string interpolation |
|
||||
| Host-007 | [Host](Host/findings.md) | REQ-HOST-4 rule "GrpcPort ≠ RemotingPort" is not enforced |
|
||||
|
||||
@@ -166,7 +166,7 @@ Template Engine: Update Template
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| **Id** | Long / GUID | Unique identifier for the audit entry. |
|
||||
| **Id** | int (identity) | Surrogate primary key for the audit entry. A 32-bit `int` identity is used deliberately: it matches the key type of every other entity in the schema (uniform repository and query code), and SQL Server identity values are not consumed by failed transactions in a way that materially accelerates exhaustion. At a sustained, unrealistically high rate of 100 audit rows per second the `int` range is not exhausted for roughly 680 years; the indefinite-retention policy does not change that horizon. If a future deployment genuinely approaches the limit, the column can be widened to `bigint` via a migration without a schema redesign. |
|
||||
| **Timestamp** | DateTimeOffset | When the action occurred (UTC). |
|
||||
| **User** | String | Authenticated AD username. |
|
||||
| **Action** | String | The type of operation. |
|
||||
|
||||
@@ -23,6 +23,8 @@ public class SiteConfiguration : IEntityTypeConfiguration<Site>
|
||||
|
||||
builder.Property(s => s.NodeAAddress).HasMaxLength(500);
|
||||
builder.Property(s => s.NodeBAddress).HasMaxLength(500);
|
||||
builder.Property(s => s.GrpcNodeAAddress).HasMaxLength(500);
|
||||
builder.Property(s => s.GrpcNodeBAddress).HasMaxLength(500);
|
||||
|
||||
builder.HasIndex(s => s.Name).IsUnique();
|
||||
builder.HasIndex(s => s.SiteIdentifier).IsUnique();
|
||||
|
||||
src/ScadaLink.ConfigurationDatabase/Migrations/20260517020720_BoundGrpcNodeAddressLength.Designer.cs
Generated
+1350
File diff suppressed because it is too large
Load Diff
+58
@@ -0,0 +1,58 @@
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Migrations
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public partial class BoundGrpcNodeAddressLength : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.AlterColumn<string>(
|
||||
name: "GrpcNodeBAddress",
|
||||
table: "Sites",
|
||||
type: "nvarchar(500)",
|
||||
maxLength: 500,
|
||||
nullable: true,
|
||||
oldClrType: typeof(string),
|
||||
oldType: "nvarchar(max)",
|
||||
oldNullable: true);
|
||||
|
||||
migrationBuilder.AlterColumn<string>(
|
||||
name: "GrpcNodeAAddress",
|
||||
table: "Sites",
|
||||
type: "nvarchar(500)",
|
||||
maxLength: 500,
|
||||
nullable: true,
|
||||
oldClrType: typeof(string),
|
||||
oldType: "nvarchar(max)",
|
||||
oldNullable: true);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.AlterColumn<string>(
|
||||
name: "GrpcNodeBAddress",
|
||||
table: "Sites",
|
||||
type: "nvarchar(max)",
|
||||
nullable: true,
|
||||
oldClrType: typeof(string),
|
||||
oldType: "nvarchar(500)",
|
||||
oldMaxLength: 500,
|
||||
oldNullable: true);
|
||||
|
||||
migrationBuilder.AlterColumn<string>(
|
||||
name: "GrpcNodeAAddress",
|
||||
table: "Sites",
|
||||
type: "nvarchar(max)",
|
||||
nullable: true,
|
||||
oldClrType: typeof(string),
|
||||
oldType: "nvarchar(500)",
|
||||
oldMaxLength: 500,
|
||||
oldNullable: true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -830,10 +830,12 @@ namespace ScadaLink.ConfigurationDatabase.Migrations
|
||||
.HasColumnType("nvarchar(2000)");
|
||||
|
||||
b.Property<string>("GrpcNodeAAddress")
|
||||
.HasColumnType("nvarchar(max)");
|
||||
.HasMaxLength(500)
|
||||
.HasColumnType("nvarchar(500)");
|
||||
|
||||
b.Property<string>("GrpcNodeBAddress")
|
||||
.HasColumnType("nvarchar(max)");
|
||||
.HasMaxLength(500)
|
||||
.HasColumnType("nvarchar(500)");
|
||||
|
||||
b.Property<string>("Name")
|
||||
.IsRequired()
|
||||
|
||||
@@ -50,6 +50,7 @@ public class CentralUiRepository : ICentralUiRepository
|
||||
.Include(t => t.Alarms)
|
||||
.Include(t => t.Scripts)
|
||||
.Include(t => t.Compositions)
|
||||
.AsSplitQuery()
|
||||
.OrderBy(t => t.Name)
|
||||
.ToListAsync(cancellationToken);
|
||||
}
|
||||
|
||||
@@ -171,6 +171,7 @@ public class DeploymentManagerRepository : IDeploymentManagerRepository
|
||||
.Include(i => i.AttributeOverrides)
|
||||
.Include(i => i.AlarmOverrides)
|
||||
.Include(i => i.ConnectionBindings)
|
||||
.AsSplitQuery()
|
||||
.FirstOrDefaultAsync(i => i.Id == instanceId, cancellationToken);
|
||||
}
|
||||
|
||||
@@ -180,6 +181,7 @@ public class DeploymentManagerRepository : IDeploymentManagerRepository
|
||||
.Include(i => i.AttributeOverrides)
|
||||
.Include(i => i.AlarmOverrides)
|
||||
.Include(i => i.ConnectionBindings)
|
||||
.AsSplitQuery()
|
||||
.FirstOrDefaultAsync(i => i.UniqueName == uniqueName, cancellationToken);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ public class ExternalSystemRepository : IExternalSystemRepository
|
||||
|
||||
public ExternalSystemRepository(ScadaLinkDbContext context)
|
||||
{
|
||||
_context = context;
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
}
|
||||
|
||||
public async Task<ExternalSystemDefinition?> GetExternalSystemByIdAsync(int id, CancellationToken cancellationToken = default)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ScadaLink.Commons.Entities.InboundApi;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
|
||||
@@ -7,10 +9,12 @@ namespace ScadaLink.ConfigurationDatabase.Repositories;
|
||||
public class InboundApiRepository : IInboundApiRepository
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly ILogger<InboundApiRepository> _logger;
|
||||
|
||||
public InboundApiRepository(ScadaLinkDbContext context)
|
||||
public InboundApiRepository(ScadaLinkDbContext context, ILogger<InboundApiRepository>? logger = null)
|
||||
{
|
||||
_context = context;
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_logger = logger ?? NullLogger<InboundApiRepository>.Instance;
|
||||
}
|
||||
|
||||
public async Task<ApiKey?> GetApiKeyByIdAsync(int id, CancellationToken cancellationToken = default)
|
||||
@@ -49,10 +53,26 @@ public class InboundApiRepository : IInboundApiRepository
|
||||
if (method?.ApprovedApiKeyIds == null)
|
||||
return new List<ApiKey>();
|
||||
|
||||
var keyIds = method.ApprovedApiKeyIds.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(s => int.TryParse(s.Trim(), out var id) ? id : -1)
|
||||
.Where(id => id > 0)
|
||||
.ToList();
|
||||
// ApprovedApiKeyIds is a comma-separated string of integer ApiKey ids. A token that
|
||||
// fails to parse indicates a corrupt value: it is dropped (it cannot identify a key),
|
||||
// but the corruption is logged as a warning so it is observable rather than silent.
|
||||
// A corrupt list would otherwise quietly approve fewer keys than intended.
|
||||
var keyIds = new List<int>();
|
||||
foreach (var token in method.ApprovedApiKeyIds.Split(',', StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
var trimmed = token.Trim();
|
||||
if (int.TryParse(trimmed, out var id) && id > 0)
|
||||
{
|
||||
keyIds.Add(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"ApiMethod {MethodId} has a malformed approved-API-key id token '{Token}' " +
|
||||
"in ApprovedApiKeyIds; it was dropped. The method may approve fewer keys than expected.",
|
||||
methodId, trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
return await _context.Set<ApiKey>().Where(k => keyIds.Contains(k.Id)).ToListAsync(cancellationToken);
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ public class NotificationRepository : INotificationRepository
|
||||
|
||||
public NotificationRepository(ScadaLinkDbContext context)
|
||||
{
|
||||
_context = context;
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
}
|
||||
|
||||
public async Task<NotificationList?> GetNotificationListByIdAsync(int id, CancellationToken cancellationToken = default)
|
||||
|
||||
@@ -24,6 +24,7 @@ public class TemplateEngineRepository : ITemplateEngineRepository
|
||||
.Include(t => t.Alarms)
|
||||
.Include(t => t.Scripts)
|
||||
.Include(t => t.Compositions)
|
||||
.AsSplitQuery()
|
||||
.FirstOrDefaultAsync(t => t.Id == id, cancellationToken);
|
||||
}
|
||||
|
||||
@@ -45,6 +46,7 @@ public class TemplateEngineRepository : ITemplateEngineRepository
|
||||
.Include(t => t.Alarms)
|
||||
.Include(t => t.Scripts)
|
||||
.Include(t => t.Compositions)
|
||||
.AsSplitQuery()
|
||||
.ToListAsync(cancellationToken);
|
||||
}
|
||||
|
||||
@@ -55,6 +57,7 @@ public class TemplateEngineRepository : ITemplateEngineRepository
|
||||
.Include(t => t.Attributes)
|
||||
.Include(t => t.Scripts)
|
||||
.Include(t => t.Compositions)
|
||||
.AsSplitQuery()
|
||||
.ToListAsync(cancellationToken);
|
||||
}
|
||||
|
||||
@@ -222,6 +225,7 @@ public class TemplateEngineRepository : ITemplateEngineRepository
|
||||
.Include(i => i.AttributeOverrides)
|
||||
.Include(i => i.AlarmOverrides)
|
||||
.Include(i => i.ConnectionBindings)
|
||||
.AsSplitQuery()
|
||||
.FirstOrDefaultAsync(i => i.Id == id, cancellationToken);
|
||||
}
|
||||
|
||||
@@ -231,6 +235,7 @@ public class TemplateEngineRepository : ITemplateEngineRepository
|
||||
.Include(i => i.AttributeOverrides)
|
||||
.Include(i => i.AlarmOverrides)
|
||||
.Include(i => i.ConnectionBindings)
|
||||
.AsSplitQuery()
|
||||
.ToListAsync(cancellationToken);
|
||||
}
|
||||
|
||||
@@ -248,6 +253,7 @@ public class TemplateEngineRepository : ITemplateEngineRepository
|
||||
.Include(i => i.AttributeOverrides)
|
||||
.Include(i => i.AlarmOverrides)
|
||||
.Include(i => i.ConnectionBindings)
|
||||
.AsSplitQuery()
|
||||
.ToListAsync(cancellationToken);
|
||||
}
|
||||
|
||||
@@ -257,6 +263,7 @@ public class TemplateEngineRepository : ITemplateEngineRepository
|
||||
.Include(i => i.AttributeOverrides)
|
||||
.Include(i => i.AlarmOverrides)
|
||||
.Include(i => i.ConnectionBindings)
|
||||
.AsSplitQuery()
|
||||
.FirstOrDefaultAsync(i => i.UniqueName == uniqueName, cancellationToken);
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
</PackageReference>
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Json" />
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Options" />
|
||||
<PackageReference Include="Microsoft.AspNetCore.DataProtection" />
|
||||
<PackageReference Include="Microsoft.AspNetCore.DataProtection.EntityFrameworkCore" />
|
||||
|
||||
@@ -12,7 +12,7 @@ public class InstanceLocator : IInstanceLocator
|
||||
|
||||
public InstanceLocator(ScadaLinkDbContext context)
|
||||
{
|
||||
_context = context;
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
}
|
||||
|
||||
public async Task<string?> GetSiteIdForInstanceAsync(
|
||||
|
||||
@@ -50,6 +50,13 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, string> _subscriptionIds = new();
|
||||
|
||||
/// <summary>
|
||||
/// DataConnectionLayer-008: reverse index of how many instances subscribe to each
|
||||
/// tag path. Lets <see cref="HandleUnsubscribe"/> decide whether any other instance
|
||||
/// still needs a tag in O(1) instead of scanning every instance's tag set.
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, int> _tagSubscriberCount = new();
|
||||
|
||||
/// <summary>
|
||||
/// Tags whose path resolution failed and are awaiting retry.
|
||||
/// </summary>
|
||||
@@ -600,7 +607,12 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
|
||||
foreach (var result in msg.Results)
|
||||
{
|
||||
instanceTags.Add(result.TagPath);
|
||||
// DataConnectionLayer-008: only a tag newly added to THIS instance's set
|
||||
// increments the reference count, so the count stays an accurate "number
|
||||
// of distinct instances subscribed to this tag".
|
||||
if (instanceTags.Add(result.TagPath))
|
||||
_tagSubscriberCount[result.TagPath] =
|
||||
_tagSubscriberCount.GetValueOrDefault(result.TagPath) + 1;
|
||||
|
||||
// Re-check against current state: another subscribe may have resolved the
|
||||
// same tag while this request's I/O was in flight.
|
||||
@@ -687,20 +699,29 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
// WP-14: Cleanup on Instance Actor stop
|
||||
foreach (var tagPath in tags)
|
||||
{
|
||||
// Check if any other instance is still subscribed to this tag
|
||||
var otherSubscribers = _subscriptionsByInstance
|
||||
.Where(kvp => kvp.Key != request.InstanceUniqueName && kvp.Value.Contains(tagPath))
|
||||
.Any();
|
||||
// DataConnectionLayer-008: drop this instance's reference; the tag is only
|
||||
// released at the adapter when no other instance still subscribes to it.
|
||||
// The reference count makes this O(1) instead of an O(instances) scan.
|
||||
var remaining = _tagSubscriberCount.GetValueOrDefault(tagPath) - 1;
|
||||
if (remaining > 0)
|
||||
{
|
||||
_tagSubscriberCount[tagPath] = remaining;
|
||||
continue;
|
||||
}
|
||||
_tagSubscriberCount.Remove(tagPath);
|
||||
|
||||
if (!otherSubscribers && _subscriptionIds.TryGetValue(tagPath, out var subId))
|
||||
// Last subscriber gone. A tag with a subscription id is a resolved tag;
|
||||
// an unresolved tag never has a subscription id, so reaching this branch
|
||||
// via TryGetValue means the tag was resolved — decrement _resolvedTags
|
||||
// unconditionally (the previous `!_unresolvedTags.Contains` re-check after
|
||||
// an unconditional Remove was always-true dead logic).
|
||||
if (_subscriptionIds.TryGetValue(tagPath, out var subId))
|
||||
{
|
||||
_ = _adapter.UnsubscribeAsync(subId);
|
||||
_subscriptionIds.Remove(tagPath);
|
||||
_unresolvedTags.Remove(tagPath);
|
||||
_resolutionInFlight.Remove(tagPath);
|
||||
_totalSubscribed--;
|
||||
if (!_unresolvedTags.Contains(tagPath))
|
||||
_resolvedTags--;
|
||||
_resolvedTags--;
|
||||
|
||||
// DataConnectionLayer-006: drop the tag's tracked quality so it is no
|
||||
// longer counted by PushBadQualityForAllTags (which sets _tagsBadQuality
|
||||
@@ -716,6 +737,16 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (_unresolvedTags.Remove(tagPath))
|
||||
{
|
||||
// Last subscriber gone for a tag that had never resolved: stop
|
||||
// retrying it and drop it from the subscribed total. The previous
|
||||
// implementation never reached this case (its guard required a
|
||||
// subscription id), so an unresolved tag leaked into the retry timer
|
||||
// and TotalSubscribedTags forever after its instance unsubscribed.
|
||||
_resolutionInFlight.Remove(tagPath);
|
||||
_totalSubscribed--;
|
||||
}
|
||||
}
|
||||
|
||||
_subscriptionsByInstance.Remove(request.InstanceUniqueName);
|
||||
|
||||
@@ -38,7 +38,12 @@ public class OpcUaDataConnection : IDataConnection
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
private volatile bool _disconnectFired;
|
||||
// DataConnectionLayer-013: an int flag toggled with Interlocked.Exchange so the
|
||||
// "only the first caller fires Disconnected" guard in RaiseDisconnected is genuinely
|
||||
// atomic. A plain volatile bool gives visibility but not atomicity — two threads
|
||||
// (e.g. the keep-alive thread and a ReadAsync failure path) could both observe it
|
||||
// false and both raise the event. 0 = not fired, 1 = fired.
|
||||
private int _disconnectFired;
|
||||
|
||||
public ConnectionHealth Status => _status;
|
||||
public event Action? Disconnected;
|
||||
@@ -82,7 +87,7 @@ public class OpcUaDataConnection : IDataConnection
|
||||
await _client.ConnectAsync(_endpointUrl, options, cancellationToken);
|
||||
|
||||
_status = ConnectionHealth.Connected;
|
||||
_disconnectFired = false;
|
||||
Interlocked.Exchange(ref _disconnectFired, 0);
|
||||
_logger.LogInformation("OPC UA connected to {Endpoint}", _endpointUrl);
|
||||
|
||||
await StartHeartbeatMonitorAsync(config.Heartbeat, cancellationToken);
|
||||
@@ -285,12 +290,15 @@ public class OpcUaDataConnection : IDataConnection
|
||||
|
||||
/// <summary>
|
||||
/// Marks the connection as disconnected and fires the Disconnected event once.
|
||||
/// Thread-safe: only the first caller triggers the event.
|
||||
/// Thread-safe: the firing guard is an atomic compare-and-set
|
||||
/// (<see cref="Interlocked.Exchange(ref int, int)"/>), so when several threads race
|
||||
/// here — e.g. the keep-alive thread via <see cref="OnClientConnectionLost"/> and a
|
||||
/// <c>ReadAsync</c> failure path — exactly one of them observes the 0→1 transition
|
||||
/// and invokes <see cref="Disconnected"/>.
|
||||
/// </summary>
|
||||
private void RaiseDisconnected()
|
||||
{
|
||||
if (_disconnectFired) return;
|
||||
_disconnectFired = true;
|
||||
if (Interlocked.Exchange(ref _disconnectFired, 1) != 0) return;
|
||||
_status = ConnectionHealth.Disconnected;
|
||||
_logger.LogWarning("OPC UA connection to {Endpoint} lost", _endpointUrl);
|
||||
Disconnected?.Invoke();
|
||||
|
||||
@@ -24,7 +24,10 @@ public class RealOpcUaClient : IOpcUaClient
|
||||
// Clear() is undefined behaviour, so they must be ConcurrentDictionary.
|
||||
private readonly ConcurrentDictionary<string, MonitoredItem> _monitoredItems = new();
|
||||
private readonly ConcurrentDictionary<string, Action<string, object?, DateTime, uint>> _callbacks = new();
|
||||
private volatile bool _connectionLostFired;
|
||||
// DataConnectionLayer-013: int flag toggled with Interlocked.Exchange so the
|
||||
// once-only ConnectionLost guard in OnSessionKeepAlive is atomic, not just visible.
|
||||
// 0 = not fired, 1 = fired.
|
||||
private int _connectionLostFired;
|
||||
private OpcUaConnectionOptions _options = new();
|
||||
private readonly OpcUaGlobalOptions _globalOptions;
|
||||
private readonly ILogger<RealOpcUaClient> _logger;
|
||||
@@ -112,7 +115,7 @@ public class RealOpcUaClient : IOpcUaClient
|
||||
"ScadaLink-DCL-Session", (uint)opts.SessionTimeoutMs, userIdentity, null, cancellationToken);
|
||||
|
||||
// Detect server going offline via keep-alive failures
|
||||
_connectionLostFired = false;
|
||||
Interlocked.Exchange(ref _connectionLostFired, 0);
|
||||
_session.KeepAlive += OnSessionKeepAlive;
|
||||
|
||||
// Store options for monitored item creation
|
||||
@@ -243,14 +246,15 @@ public class RealOpcUaClient : IOpcUaClient
|
||||
|
||||
/// <summary>
|
||||
/// Called by the OPC UA SDK when a keep-alive response arrives (or fails).
|
||||
/// When CurrentState is bad, the server is unreachable.
|
||||
/// When CurrentState is bad, the server is unreachable. The once-only guard is an
|
||||
/// atomic compare-and-set, so a burst of failed keep-alives raises
|
||||
/// <see cref="ConnectionLost"/> exactly once.
|
||||
/// </summary>
|
||||
private void OnSessionKeepAlive(ISession session, KeepAliveEventArgs e)
|
||||
{
|
||||
if (ServiceResult.IsBad(e.Status))
|
||||
{
|
||||
if (_connectionLostFired) return;
|
||||
_connectionLostFired = true;
|
||||
if (Interlocked.Exchange(ref _connectionLostFired, 1) != 0) return;
|
||||
ConnectionLost?.Invoke();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,9 +58,17 @@ public class ArtifactDeploymentService
|
||||
/// Collects all artifact types from repositories and builds a <see cref="DeployArtifactsCommand"/>
|
||||
/// scoped to a specific site's data connections.
|
||||
/// </summary>
|
||||
/// <param name="siteId">The DB id of the site whose data connections are collected.</param>
|
||||
/// <param name="deploymentId">
|
||||
/// DeploymentManager-010: the logical deployment id for this artifact deployment. All per-site
|
||||
/// commands of one <see cref="DeployToAllSitesAsync"/> call share this id so the audit log,
|
||||
/// UI summary, and persisted record correlate. When <c>null</c> a fresh id is minted (used by
|
||||
/// single-site retries).
|
||||
/// </param>
|
||||
public async Task<DeployArtifactsCommand> BuildDeployArtifactsCommandAsync(
|
||||
int siteId,
|
||||
CancellationToken cancellationToken = default)
|
||||
CancellationToken cancellationToken = default,
|
||||
string? deploymentId = null)
|
||||
{
|
||||
var sharedScripts = await _templateRepo.GetAllSharedScriptsAsync(cancellationToken);
|
||||
var externalSystems = await _externalSystemRepo.GetAllExternalSystemsAsync(cancellationToken);
|
||||
@@ -111,7 +119,7 @@ public class ArtifactDeploymentService
|
||||
smtp.Credentials, null, smtp.TlsMode)).ToList();
|
||||
|
||||
return new DeployArtifactsCommand(
|
||||
Guid.NewGuid().ToString("N"),
|
||||
deploymentId ?? Guid.NewGuid().ToString("N"),
|
||||
scriptArtifacts,
|
||||
externalSystemArtifacts,
|
||||
dbConnectionArtifacts,
|
||||
@@ -136,11 +144,15 @@ public class ArtifactDeploymentService
|
||||
var deploymentId = Guid.NewGuid().ToString("N");
|
||||
var perSiteResults = new Dictionary<string, SiteArtifactResult>();
|
||||
|
||||
// Build per-site commands sequentially (DbContext is not thread-safe)
|
||||
// Build per-site commands sequentially (DbContext is not thread-safe).
|
||||
// DeploymentManager-010: every per-site command carries the SAME logical
|
||||
// deploymentId, so the per-site commands, audit log, persisted record,
|
||||
// and UI summary all reference one id instead of N+1 unrelated GUIDs.
|
||||
var siteCommands = new Dictionary<int, DeployArtifactsCommand>();
|
||||
foreach (var site in sites)
|
||||
{
|
||||
siteCommands[site.Id] = await BuildDeployArtifactsCommandAsync(site.Id, cancellationToken);
|
||||
siteCommands[site.Id] = await BuildDeployArtifactsCommandAsync(
|
||||
site.Id, cancellationToken, deploymentId);
|
||||
}
|
||||
|
||||
// Deploy to each site in parallel with per-site timeout
|
||||
@@ -190,11 +202,20 @@ public class ArtifactDeploymentService
|
||||
perSiteResults[result.SiteId] = result;
|
||||
}
|
||||
|
||||
// Persist the system artifact deployment record
|
||||
// Persist the system artifact deployment record.
|
||||
// DeploymentManager-010: SystemArtifactDeploymentRecord has no dedicated
|
||||
// DeploymentId column (adding one is a Commons/ConfigurationDatabase
|
||||
// schema change outside this module). The logical deploymentId is
|
||||
// embedded in the PerSiteStatus payload so the persisted record can be
|
||||
// correlated with the audit log and UI summary that report the same id.
|
||||
var record = new SystemArtifactDeploymentRecord("Artifacts", user)
|
||||
{
|
||||
DeployedAt = DateTimeOffset.UtcNow,
|
||||
PerSiteStatus = JsonSerializer.Serialize(perSiteResults)
|
||||
PerSiteStatus = JsonSerializer.Serialize(new
|
||||
{
|
||||
DeploymentId = deploymentId,
|
||||
Sites = perSiteResults
|
||||
})
|
||||
};
|
||||
await _deploymentRepo.AddSystemArtifactDeploymentAsync(record, cancellationToken);
|
||||
await _deploymentRepo.SaveChangesAsync(cancellationToken);
|
||||
|
||||
@@ -5,7 +5,11 @@ namespace ScadaLink.DeploymentManager;
|
||||
/// </summary>
|
||||
public class DeploymentManagerOptions
|
||||
{
|
||||
/// <summary>Timeout for lifecycle commands sent to sites (disable, enable, delete).</summary>
|
||||
/// <summary>
|
||||
/// WP-6: Timeout for a lifecycle command round-trip (disable, enable, delete).
|
||||
/// Applied as a linked-CTS deadline in <c>DeploymentService</c> so a hung or
|
||||
/// unreachable site does not hold the per-instance operation lock indefinitely.
|
||||
/// </summary>
|
||||
public TimeSpan LifecycleCommandTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>WP-7: Timeout per site for system-wide artifact deployment.</summary>
|
||||
|
||||
@@ -302,7 +302,21 @@ public class DeploymentService
|
||||
var siteId = await ResolveSiteIdentifierAsync(instance.SiteId, cancellationToken);
|
||||
var command = new DisableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.DisableInstanceAsync(siteId, command, cancellationToken);
|
||||
// WP-6: bound the round-trip with the configured lifecycle timeout so a
|
||||
// hung/unreachable site does not block the operation lock indefinitely.
|
||||
InstanceLifecycleResponse response;
|
||||
try
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
cts.CancelAfter(_options.LifecycleCommandTimeout);
|
||||
response = await _communicationService.DisableInstanceAsync(siteId, command, cts.Token);
|
||||
}
|
||||
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Disable of instance {Instance} timed out", instance.UniqueName);
|
||||
return Result<InstanceLifecycleResponse>.Failure(
|
||||
$"Disable failed: the site did not respond within {_options.LifecycleCommandTimeout}.");
|
||||
}
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
@@ -343,7 +357,20 @@ public class DeploymentService
|
||||
var siteId = await ResolveSiteIdentifierAsync(instance.SiteId, cancellationToken);
|
||||
var command = new EnableInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.EnableInstanceAsync(siteId, command, cancellationToken);
|
||||
// WP-6: bound the round-trip with the configured lifecycle timeout.
|
||||
InstanceLifecycleResponse response;
|
||||
try
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
cts.CancelAfter(_options.LifecycleCommandTimeout);
|
||||
response = await _communicationService.EnableInstanceAsync(siteId, command, cts.Token);
|
||||
}
|
||||
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Enable of instance {Instance} timed out", instance.UniqueName);
|
||||
return Result<InstanceLifecycleResponse>.Failure(
|
||||
$"Enable failed: the site did not respond within {_options.LifecycleCommandTimeout}.");
|
||||
}
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
@@ -365,7 +392,9 @@ public class DeploymentService
|
||||
/// WP-6: Delete an instance. Stops the site actor, removes site config, and
|
||||
/// removes the central instance record (deployment history, snapshot,
|
||||
/// overrides, and connection bindings go with it). S&F NOT cleared.
|
||||
/// Delete fails if site unreachable (30s timeout via CommunicationOptions).
|
||||
/// Delete fails if the site is unreachable within
|
||||
/// <c>CommunicationOptions.LifecycleTimeout</c> (applied inside
|
||||
/// <see cref="CommunicationService.DeleteInstanceAsync"/>).
|
||||
/// </summary>
|
||||
public async Task<Result<InstanceLifecycleResponse>> DeleteInstanceAsync(
|
||||
int instanceId,
|
||||
@@ -387,7 +416,20 @@ public class DeploymentService
|
||||
var siteId = await ResolveSiteIdentifierAsync(instance.SiteId, cancellationToken);
|
||||
var command = new DeleteInstanceCommand(commandId, instance.UniqueName, DateTimeOffset.UtcNow);
|
||||
|
||||
var response = await _communicationService.DeleteInstanceAsync(siteId, command, cancellationToken);
|
||||
// WP-6: bound the round-trip with the configured lifecycle timeout.
|
||||
InstanceLifecycleResponse response;
|
||||
try
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
cts.CancelAfter(_options.LifecycleCommandTimeout);
|
||||
response = await _communicationService.DeleteInstanceAsync(siteId, command, cts.Token);
|
||||
}
|
||||
catch (Exception ex) when (ex is TimeoutException or OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Delete of instance {Instance} timed out", instance.UniqueName);
|
||||
return Result<InstanceLifecycleResponse>.Failure(
|
||||
$"Delete failed: the site did not respond within {_options.LifecycleCommandTimeout}.");
|
||||
}
|
||||
|
||||
if (response.Success)
|
||||
{
|
||||
|
||||
@@ -11,6 +11,10 @@ public static class ErrorClassifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Determines whether an HTTP status code represents a transient failure.
|
||||
/// Transient: HTTP 5xx, 408 (Request Timeout) and 429 (Too Many Requests).
|
||||
/// Every other non-success status (the remaining 4xx) defaults to permanent —
|
||||
/// a permanent failure is the safe default because retrying a 4xx is unlikely to
|
||||
/// succeed and risks duplicate side effects.
|
||||
/// </summary>
|
||||
public static bool IsTransient(HttpStatusCode statusCode)
|
||||
{
|
||||
|
||||
@@ -272,10 +272,21 @@ public class ExternalSystemClient : IExternalSystemClient
|
||||
|
||||
if (ErrorClassifier.IsTransient(response.StatusCode))
|
||||
{
|
||||
// Transient failures are normal operation (handled by retry / S&F) —
|
||||
// record at debug level only so the event log is not noisy.
|
||||
_logger.LogDebug(
|
||||
"Transient HTTP {StatusCode} from external system {System} calling {Method}.",
|
||||
(int)response.StatusCode, system.Name, method.Name);
|
||||
throw ErrorClassifier.AsTransient(
|
||||
$"HTTP {(int)response.StatusCode} from {system.Name}: {errorBody}");
|
||||
}
|
||||
|
||||
// The design requires permanent failures to be visible in Site Event
|
||||
// Logging — emit a warning so the gateway is not silent on a permanent
|
||||
// failure (ExternalSystemGateway-012).
|
||||
_logger.LogWarning(
|
||||
"Permanent HTTP {StatusCode} from external system {System} calling {Method}: {Error}",
|
||||
(int)response.StatusCode, system.Name, method.Name, errorBody);
|
||||
throw new PermanentExternalSystemException(
|
||||
$"HTTP {(int)response.StatusCode} from {system.Name}: {errorBody}",
|
||||
(int)response.StatusCode);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
|
||||
namespace ScadaLink.ExternalSystemGateway;
|
||||
@@ -11,6 +12,22 @@ public static class ServiceCollectionExtensions
|
||||
.BindConfiguration("ScadaLink:ExternalSystemGateway");
|
||||
|
||||
services.AddHttpClient();
|
||||
|
||||
// ExternalSystemGateway-013: wire MaxConcurrentConnectionsPerSystem into the
|
||||
// primary handler of every per-system named client ("ExternalSystem_{name}"),
|
||||
// so the option an operator configures actually bounds concurrent connections
|
||||
// instead of being silently ignored. ConfigureHttpClientDefaults applies to
|
||||
// the dynamically-named clients created by ExternalSystemClient.
|
||||
services.ConfigureHttpClientDefaults(builder =>
|
||||
builder.ConfigurePrimaryHttpMessageHandler(sp =>
|
||||
{
|
||||
var options = sp.GetRequiredService<IOptions<ExternalSystemGatewayOptions>>().Value;
|
||||
return new SocketsHttpHandler
|
||||
{
|
||||
MaxConnectionsPerServer = options.MaxConcurrentConnectionsPerSystem,
|
||||
};
|
||||
}));
|
||||
|
||||
services.AddScoped<ExternalSystemClient>();
|
||||
services.AddScoped<IExternalSystemClient>(sp => sp.GetRequiredService<ExternalSystemClient>());
|
||||
services.AddScoped<DatabaseGateway>();
|
||||
|
||||
@@ -210,10 +210,12 @@ public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregat
|
||||
var state = kvp.Value;
|
||||
if (!state.IsOnline) continue;
|
||||
|
||||
// Use LastHeartbeatAt — heartbeats arrive frequently from any
|
||||
// Use LastHeartbeatAt — heartbeats arrive every ~5s from any
|
||||
// healthy site node (cadence owned by Cluster Infrastructure /
|
||||
// SiteCommunicationActor), so OfflineTimeout only fires when no
|
||||
// node can reach central, not during single-node failovers.
|
||||
// SiteCommunicationActor — CommunicationOptions.TransportHeartbeatInterval),
|
||||
// so the 60s OfflineTimeout tolerates several missed heartbeats and
|
||||
// only fires when no node can reach central, not during single-node
|
||||
// failovers.
|
||||
//
|
||||
// The synthetic "central" site has no heartbeat source — its only
|
||||
// signal is the 30s CentralHealthReportLoop self-report — so it gets
|
||||
|
||||
@@ -29,22 +29,31 @@ public class CentralHealthReportLoop : BackgroundService
|
||||
|
||||
// Seeded with Unix-ms so reports from a newly-elected central leader
|
||||
// always sort after reports from any prior leader for siteId="central".
|
||||
private long _sequenceNumber = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
// The clock is read through the injected TimeProvider so the seeding is
|
||||
// deterministically testable.
|
||||
private long _sequenceNumber;
|
||||
|
||||
public CentralHealthReportLoop(
|
||||
ISiteHealthCollector collector,
|
||||
ICentralHealthAggregator aggregator,
|
||||
IClusterNodeProvider clusterNodeProvider,
|
||||
IOptions<HealthMonitoringOptions> options,
|
||||
ILogger<CentralHealthReportLoop> logger)
|
||||
ILogger<CentralHealthReportLoop> logger,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_collector = collector;
|
||||
_aggregator = aggregator;
|
||||
_clusterNodeProvider = clusterNodeProvider;
|
||||
_options = options.Value;
|
||||
_logger = logger;
|
||||
_sequenceNumber = (timeProvider ?? TimeProvider.System).GetUtcNow().ToUnixTimeMilliseconds();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Current sequence number (for testing).
|
||||
/// </summary>
|
||||
public long CurrentSequenceNumber => Interlocked.Read(ref _sequenceNumber);
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
|
||||
@@ -8,7 +8,12 @@ namespace ScadaLink.HealthMonitoring;
|
||||
|
||||
/// <summary>
|
||||
/// Periodically collects a SiteHealthReport and sends it to central via Akka remoting.
|
||||
/// Sequence numbers are monotonic, starting at 1, and reset on service restart.
|
||||
/// Sequence numbers are monotonic and reset on service restart. They are <b>not</b>
|
||||
/// zero/one-based: the per-process counter is seeded with the current Unix epoch
|
||||
/// (milliseconds) at construction so that, after a failover, reports from a
|
||||
/// freshly-active node always sort after reports from any prior active node for the
|
||||
/// same site — otherwise the central aggregator's sequence-number guard would
|
||||
/// silently reject the new active's first reports as stale.
|
||||
/// </summary>
|
||||
public class HealthReportSender : BackgroundService
|
||||
{
|
||||
@@ -24,8 +29,9 @@ public class HealthReportSender : BackgroundService
|
||||
// node always sort after reports from any prior active node for the same
|
||||
// site. Without this seeding, failover would silently drop the new
|
||||
// active's first reports because their per-process counter starts below
|
||||
// the prior active's last sequence number.
|
||||
private long _sequenceNumber = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
// the prior active's last sequence number. The clock is read through the
|
||||
// injected TimeProvider so the seeding is deterministically testable.
|
||||
private long _sequenceNumber;
|
||||
|
||||
public HealthReportSender(
|
||||
ISiteHealthCollector collector,
|
||||
@@ -34,7 +40,8 @@ public class HealthReportSender : BackgroundService
|
||||
ILogger<HealthReportSender> logger,
|
||||
ISiteIdentityProvider siteIdentityProvider,
|
||||
StoreAndForwardStorage? sfStorage = null,
|
||||
IClusterNodeProvider? clusterNodeProvider = null)
|
||||
IClusterNodeProvider? clusterNodeProvider = null,
|
||||
TimeProvider? timeProvider = null)
|
||||
{
|
||||
_collector = collector;
|
||||
_transport = transport;
|
||||
@@ -43,6 +50,7 @@ public class HealthReportSender : BackgroundService
|
||||
_siteId = siteIdentityProvider.SiteId;
|
||||
_sfStorage = sfStorage;
|
||||
_clusterNodeProvider = clusterNodeProvider;
|
||||
_sequenceNumber = (timeProvider ?? TimeProvider.System).GetUtcNow().ToUnixTimeMilliseconds();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -73,7 +81,14 @@ public class HealthReportSender : BackgroundService
|
||||
{
|
||||
_collector.SetClusterNodes(_clusterNodeProvider.GetClusterNodes());
|
||||
}
|
||||
catch { /* Non-fatal */ }
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Non-fatal — the report ships with the previous cluster
|
||||
// node list. Logged so a persistent failure is diagnosable.
|
||||
_logger.LogWarning(ex,
|
||||
"Failed to refresh cluster nodes for health report (site {SiteId}); using stale list",
|
||||
_siteId);
|
||||
}
|
||||
}
|
||||
|
||||
if (_sfStorage != null)
|
||||
@@ -83,7 +98,13 @@ public class HealthReportSender : BackgroundService
|
||||
var parkedCount = await _sfStorage.GetParkedMessageCountAsync();
|
||||
_collector.SetParkedMessageCount(parkedCount);
|
||||
}
|
||||
catch { /* Non-fatal — parked count will be 0 */ }
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Non-fatal — parked count will be 0 in this report.
|
||||
_logger.LogWarning(ex,
|
||||
"Failed to query parked message count for health report (site {SiteId})",
|
||||
_siteId);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
@@ -97,7 +118,13 @@ public class HealthReportSender : BackgroundService
|
||||
kvp => kvp.Value);
|
||||
_collector.SetStoreAndForwardDepths(depths);
|
||||
}
|
||||
catch { /* Non-fatal — buffer depths will be empty */ }
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Non-fatal — buffer depths will be empty in this report.
|
||||
_logger.LogWarning(ex,
|
||||
"Failed to query store-and-forward buffer depths for health report (site {SiteId})",
|
||||
_siteId);
|
||||
}
|
||||
}
|
||||
|
||||
var seq = Interlocked.Increment(ref _sequenceNumber);
|
||||
|
||||
@@ -13,9 +13,14 @@ public interface ICentralHealthAggregator
|
||||
/// <summary>
|
||||
/// Bumps the last-seen timestamp for a site, keeping it marked online
|
||||
/// between full 30s reports when heartbeats are arriving — protects against
|
||||
/// the offline threshold firing on a transiently delayed report. A heartbeat
|
||||
/// for a site with no aggregator state yet (e.g. just after a central
|
||||
/// restart/failover) registers that site as online with no
|
||||
/// the offline threshold firing on a transiently delayed report. Heartbeat
|
||||
/// cadence is owned by the Cluster Infrastructure / <c>SiteCommunicationActor</c>
|
||||
/// (the application-level heartbeat to central, sent every
|
||||
/// <c>CommunicationOptions.TransportHeartbeatInterval</c> — 5s by default);
|
||||
/// the 60s <see cref="HealthMonitoringOptions.OfflineTimeout"/> therefore
|
||||
/// tolerates several missed heartbeats. A heartbeat for a site with no
|
||||
/// aggregator state yet (e.g. just after a central restart/failover)
|
||||
/// registers that site as online with no
|
||||
/// <see cref="SiteHealthState.LatestReport"/>, so reachable sites are not
|
||||
/// shown as "unknown" during the failover window.
|
||||
/// </summary>
|
||||
|
||||
@@ -38,10 +38,4 @@ public static class ServiceCollectionExtensions
|
||||
services.AddHostedService<CentralHealthReportLoop>();
|
||||
return services;
|
||||
}
|
||||
|
||||
public static IServiceCollection AddHealthMonitoringActors(this IServiceCollection services)
|
||||
{
|
||||
// Placeholder for Akka actor registration (Phase 4+)
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,8 +30,9 @@ public sealed record SiteHealthState
|
||||
/// Time the most recent signal of any kind (full report OR heartbeat) was
|
||||
/// received. Drives offline detection — heartbeats from the standby keep the
|
||||
/// site marked online even when the active node is unable to produce a report
|
||||
/// (mid-failover, brief stalls). See the heartbeat scheduler owned by the
|
||||
/// Cluster Infrastructure / SiteCommunicationActor for the actual cadence.
|
||||
/// (mid-failover, brief stalls). Heartbeat cadence is owned by the Cluster
|
||||
/// Infrastructure / SiteCommunicationActor (every
|
||||
/// CommunicationOptions.TransportHeartbeatInterval — 5s by default).
|
||||
/// </summary>
|
||||
public DateTimeOffset LastHeartbeatAt { get; init; }
|
||||
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Entities.InboundApi;
|
||||
using ScadaLink.ConfigurationDatabase;
|
||||
using ScadaLink.ConfigurationDatabase.Repositories;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Tests;
|
||||
|
||||
public class InboundApiRepositoryTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly CapturingLogger<InboundApiRepository> _logger = new();
|
||||
private readonly InboundApiRepository _repository;
|
||||
|
||||
public InboundApiRepositoryTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
_repository = new InboundApiRepository(_context, _logger);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AddApiKey_AndGetById_RoundTrips()
|
||||
{
|
||||
var key = new ApiKey("Key1", "secret-value-1") { IsEnabled = true };
|
||||
await _repository.AddApiKeyAsync(key);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var loaded = await _repository.GetApiKeyByIdAsync(key.Id);
|
||||
Assert.NotNull(loaded);
|
||||
Assert.Equal("Key1", loaded!.Name);
|
||||
|
||||
var byValue = await _repository.GetApiKeyByValueAsync("secret-value-1");
|
||||
Assert.NotNull(byValue);
|
||||
Assert.Equal(key.Id, byValue!.Id);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AddApiMethod_AndGetByName_RoundTrips()
|
||||
{
|
||||
var method = new ApiMethod("DoThing", "return 1;");
|
||||
await _repository.AddApiMethodAsync(method);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var loaded = await _repository.GetMethodByNameAsync("DoThing");
|
||||
Assert.NotNull(loaded);
|
||||
Assert.Equal(method.Id, loaded!.Id);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetApprovedKeysForMethod_WithValidCsv_ReturnsAllKeys()
|
||||
{
|
||||
var k1 = new ApiKey("K1", "v1");
|
||||
var k2 = new ApiKey("K2", "v2");
|
||||
await _repository.AddApiKeyAsync(k1);
|
||||
await _repository.AddApiKeyAsync(k2);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var method = new ApiMethod("M", "return 1;") { ApprovedApiKeyIds = $"{k1.Id}, {k2.Id}" };
|
||||
await _repository.AddApiMethodAsync(method);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var keys = await _repository.GetApprovedKeysForMethodAsync(method.Id);
|
||||
|
||||
Assert.Equal(2, keys.Count);
|
||||
Assert.Empty(_logger.Warnings);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetApprovedKeysForMethod_WithMalformedCsvToken_LogsWarningAndDropsToken()
|
||||
{
|
||||
// Regression guard for ConfigurationDatabase-008: a corrupt token (a name where an
|
||||
// integer id is expected) must not be dropped silently — the corruption must be
|
||||
// observable via a logged warning, while the valid ids still resolve.
|
||||
var k1 = new ApiKey("K1", "v1");
|
||||
await _repository.AddApiKeyAsync(k1);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var method = new ApiMethod("M", "return 1;") { ApprovedApiKeyIds = $"{k1.Id},not-an-id" };
|
||||
await _repository.AddApiMethodAsync(method);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var keys = await _repository.GetApprovedKeysForMethodAsync(method.Id);
|
||||
|
||||
Assert.Single(keys);
|
||||
Assert.Equal(k1.Id, keys[0].Id);
|
||||
Assert.Single(_logger.Warnings);
|
||||
Assert.Contains("not-an-id", _logger.Warnings[0]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetApprovedKeysForMethod_WithNullOrEmptyCsv_ReturnsEmptyWithoutWarning()
|
||||
{
|
||||
var method = new ApiMethod("M", "return 1;");
|
||||
await _repository.AddApiMethodAsync(method);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var keys = await _repository.GetApprovedKeysForMethodAsync(method.Id);
|
||||
|
||||
Assert.Empty(keys);
|
||||
Assert.Empty(_logger.Warnings);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeleteApiMethod_RemovesEntity()
|
||||
{
|
||||
var method = new ApiMethod("ToDelete", "return 1;");
|
||||
await _repository.AddApiMethodAsync(method);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
await _repository.DeleteApiMethodAsync(method.Id);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
Assert.Null(await _repository.GetApiMethodByIdAsync(method.Id));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullContext_Throws()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => new InboundApiRepository(null!));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Minimal ILogger that captures warning-level messages for assertions.</summary>
|
||||
internal sealed class CapturingLogger<T> : ILogger<T>
|
||||
{
|
||||
public List<string> Warnings { get; } = new();
|
||||
|
||||
public IDisposable BeginScope<TState>(TState state) where TState : notnull => NullScope.Instance;
|
||||
|
||||
public bool IsEnabled(LogLevel logLevel) => true;
|
||||
|
||||
public void Log<TState>(LogLevel logLevel, EventId eventId, TState state, Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
if (logLevel == LogLevel.Warning)
|
||||
{
|
||||
Warnings.Add(formatter(state, exception));
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class NullScope : IDisposable
|
||||
{
|
||||
public static readonly NullScope Instance = new();
|
||||
public void Dispose() { }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,366 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ScadaLink.Commons.Entities.Deployment;
|
||||
using ScadaLink.Commons.Entities.ExternalSystems;
|
||||
using ScadaLink.Commons.Entities.Instances;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Entities.Sites;
|
||||
using ScadaLink.Commons.Entities.Templates;
|
||||
using ScadaLink.ConfigurationDatabase;
|
||||
using ScadaLink.ConfigurationDatabase.Repositories;
|
||||
using ScadaLink.ConfigurationDatabase.Services;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Tests;
|
||||
|
||||
// Regression coverage for ConfigurationDatabase-010 (repositories / InstanceLocator lacked
|
||||
// direct tests) and ConfigurationDatabase-011 (inconsistent constructor null-guarding).
|
||||
|
||||
public class ExternalSystemRepositoryTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly ExternalSystemRepository _repository;
|
||||
|
||||
public ExternalSystemRepositoryTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
_repository = new ExternalSystemRepository(_context);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AddExternalSystem_AndGetById_RoundTrips()
|
||||
{
|
||||
var def = new ExternalSystemDefinition("Sys", "https://example.test", "ApiKey");
|
||||
await _repository.AddExternalSystemAsync(def);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var loaded = await _repository.GetExternalSystemByIdAsync(def.Id);
|
||||
Assert.NotNull(loaded);
|
||||
Assert.Equal("Sys", loaded!.Name);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetMethodsByExternalSystemId_FiltersByParent()
|
||||
{
|
||||
var def = new ExternalSystemDefinition("Sys", "https://example.test", "ApiKey");
|
||||
await _repository.AddExternalSystemAsync(def);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
await _repository.AddExternalSystemMethodAsync(
|
||||
new ExternalSystemMethod("M1", "GET", "/m1") { ExternalSystemDefinitionId = def.Id });
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var methods = await _repository.GetMethodsByExternalSystemIdAsync(def.Id);
|
||||
Assert.Single(methods);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeleteDatabaseConnection_RemovesEntity()
|
||||
{
|
||||
var conn = new DatabaseConnectionDefinition("Db", "Server=x;Database=y;");
|
||||
await _repository.AddDatabaseConnectionAsync(conn);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
await _repository.DeleteDatabaseConnectionAsync(conn.Id);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
Assert.Null(await _repository.GetDatabaseConnectionByIdAsync(conn.Id));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullContext_Throws()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => new ExternalSystemRepository(null!));
|
||||
}
|
||||
}
|
||||
|
||||
public class NotificationRepositoryTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly NotificationRepository _repository;
|
||||
|
||||
public NotificationRepositoryTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
_repository = new NotificationRepository(_context);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AddNotificationList_WithRecipients_RoundTrips()
|
||||
{
|
||||
var list = new NotificationList("Ops");
|
||||
list.Recipients.Add(new NotificationRecipient("Ops Team", "ops@example.test"));
|
||||
await _repository.AddNotificationListAsync(list);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var loaded = await _repository.GetListByNameAsync("Ops");
|
||||
Assert.NotNull(loaded);
|
||||
|
||||
var all = await _repository.GetAllNotificationListsAsync();
|
||||
Assert.Single(all);
|
||||
Assert.Single(all[0].Recipients);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AddSmtpConfiguration_AndGetById_RoundTrips()
|
||||
{
|
||||
var smtp = new SmtpConfiguration("smtp.example.test", "Basic", "from@example.test");
|
||||
await _repository.AddSmtpConfigurationAsync(smtp);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var loaded = await _repository.GetSmtpConfigurationByIdAsync(smtp.Id);
|
||||
Assert.NotNull(loaded);
|
||||
Assert.Equal("smtp.example.test", loaded!.Host);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeleteNotificationList_RemovesEntity()
|
||||
{
|
||||
var list = new NotificationList("ToDelete");
|
||||
await _repository.AddNotificationListAsync(list);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
await _repository.DeleteNotificationListAsync(list.Id);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
Assert.Null(await _repository.GetNotificationListByIdAsync(list.Id));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullContext_Throws()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => new NotificationRepository(null!));
|
||||
}
|
||||
}
|
||||
|
||||
public class SiteRepositoryTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly SiteRepository _repository;
|
||||
|
||||
public SiteRepositoryTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
_repository = new SiteRepository(_context);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AddSite_AndGetByIdentifier_RoundTrips()
|
||||
{
|
||||
var site = new Site("Site1", "S-001");
|
||||
await _repository.AddSiteAsync(site);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var loaded = await _repository.GetSiteByIdentifierAsync("S-001");
|
||||
Assert.NotNull(loaded);
|
||||
Assert.Equal("Site1", loaded!.Name);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeleteSite_ViaStubAttachPath_RemovesEntity()
|
||||
{
|
||||
// Exercises the stub-attach delete fallback: the entity is not tracked because the
|
||||
// ChangeTracker is cleared, forcing the Local-miss branch in DeleteSiteAsync.
|
||||
var site = new Site("Site1", "S-001");
|
||||
await _repository.AddSiteAsync(site);
|
||||
await _repository.SaveChangesAsync();
|
||||
var id = site.Id;
|
||||
_context.ChangeTracker.Clear();
|
||||
|
||||
await _repository.DeleteSiteAsync(id);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
Assert.Null(await _repository.GetSiteByIdAsync(id));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeleteDataConnection_ViaStubAttachPath_RemovesEntity()
|
||||
{
|
||||
var site = new Site("Site1", "S-001");
|
||||
await _repository.AddSiteAsync(site);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var conn = new DataConnection("Conn1", "OpcUa", site.Id);
|
||||
await _repository.AddDataConnectionAsync(conn);
|
||||
await _repository.SaveChangesAsync();
|
||||
var id = conn.Id;
|
||||
_context.ChangeTracker.Clear();
|
||||
|
||||
await _repository.DeleteDataConnectionAsync(id);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
Assert.Null(await _repository.GetDataConnectionByIdAsync(id));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetInstancesBySiteId_FiltersBySite()
|
||||
{
|
||||
var site = new Site("Site1", "S-001");
|
||||
var template = new Template("T1");
|
||||
_context.Sites.Add(site);
|
||||
_context.Templates.Add(template);
|
||||
await _context.SaveChangesAsync();
|
||||
|
||||
_context.Instances.Add(new Instance("I1") { SiteId = site.Id, TemplateId = template.Id });
|
||||
await _context.SaveChangesAsync();
|
||||
|
||||
var instances = await _repository.GetInstancesBySiteIdAsync(site.Id);
|
||||
Assert.Single(instances);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullContext_Throws()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => new SiteRepository(null!));
|
||||
}
|
||||
}
|
||||
|
||||
public class DeploymentManagerRepositoryTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly DeploymentManagerRepository _repository;
|
||||
|
||||
public DeploymentManagerRepositoryTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
_repository = new DeploymentManagerRepository(_context);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
private async Task<Instance> SeedInstanceAsync()
|
||||
{
|
||||
var site = new Site("Site1", "S-001");
|
||||
var template = new Template("T1");
|
||||
_context.Sites.Add(site);
|
||||
_context.Templates.Add(template);
|
||||
await _context.SaveChangesAsync();
|
||||
|
||||
var instance = new Instance("Inst1") { SiteId = site.Id, TemplateId = template.Id };
|
||||
_context.Instances.Add(instance);
|
||||
await _context.SaveChangesAsync();
|
||||
return instance;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AddDeploymentRecord_AndGetCurrentStatus_ReturnsMostRecent()
|
||||
{
|
||||
var instance = await SeedInstanceAsync();
|
||||
|
||||
await _repository.AddDeploymentRecordAsync(
|
||||
new DeploymentRecord("d-001", "admin") { InstanceId = instance.Id, DeployedAt = DateTimeOffset.UtcNow.AddHours(-1) });
|
||||
await _repository.AddDeploymentRecordAsync(
|
||||
new DeploymentRecord("d-002", "admin") { InstanceId = instance.Id, DeployedAt = DateTimeOffset.UtcNow });
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
var current = await _repository.GetCurrentDeploymentStatusAsync(instance.Id);
|
||||
Assert.NotNull(current);
|
||||
Assert.Equal("d-002", current!.DeploymentId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeleteDeploymentRecord_ViaStubAttachPath_RemovesEntity()
|
||||
{
|
||||
var instance = await SeedInstanceAsync();
|
||||
var record = new DeploymentRecord("d-001", "admin") { InstanceId = instance.Id, DeployedAt = DateTimeOffset.UtcNow };
|
||||
await _repository.AddDeploymentRecordAsync(record);
|
||||
await _repository.SaveChangesAsync();
|
||||
var id = record.Id;
|
||||
_context.ChangeTracker.Clear();
|
||||
|
||||
await _repository.DeleteDeploymentRecordAsync(id);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
Assert.Null(await _repository.GetDeploymentRecordByIdAsync(id));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DeleteInstance_RemovesRestrictFkDeploymentRecordsFirst()
|
||||
{
|
||||
// DeploymentRecord has a Restrict FK to Instance; DeleteInstanceAsync must remove
|
||||
// the dependent deployment records explicitly or the delete would fail.
|
||||
var instance = await SeedInstanceAsync();
|
||||
await _repository.AddDeploymentRecordAsync(
|
||||
new DeploymentRecord("d-001", "admin") { InstanceId = instance.Id, DeployedAt = DateTimeOffset.UtcNow });
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
await _repository.DeleteInstanceAsync(instance.Id);
|
||||
await _repository.SaveChangesAsync();
|
||||
|
||||
Assert.Null(await _repository.GetInstanceByIdAsync(instance.Id));
|
||||
Assert.Empty(await _repository.GetDeploymentsByInstanceIdAsync(instance.Id));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullContext_Throws()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => new DeploymentManagerRepository(null!));
|
||||
}
|
||||
}
|
||||
|
||||
public class InstanceLocatorTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly InstanceLocator _locator;
|
||||
|
||||
public InstanceLocatorTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
_locator = new InstanceLocator(_context);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetSiteIdForInstance_WhenFound_ReturnsSiteIdentifier()
|
||||
{
|
||||
var site = new Site("Site1", "SITE-001");
|
||||
var template = new Template("T1");
|
||||
_context.Sites.Add(site);
|
||||
_context.Templates.Add(template);
|
||||
await _context.SaveChangesAsync();
|
||||
|
||||
_context.Instances.Add(new Instance("Pump1") { SiteId = site.Id, TemplateId = template.Id });
|
||||
await _context.SaveChangesAsync();
|
||||
|
||||
var result = await _locator.GetSiteIdForInstanceAsync("Pump1");
|
||||
Assert.Equal("SITE-001", result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetSiteIdForInstance_WhenInstanceNotFound_ReturnsNull()
|
||||
{
|
||||
var result = await _locator.GetSiteIdForInstanceAsync("DoesNotExist");
|
||||
Assert.Null(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullContext_Throws()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => new InstanceLocator(null!));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ScadaLink.Commons.Entities.Sites;
|
||||
using ScadaLink.Commons.Entities.Templates;
|
||||
using ScadaLink.ConfigurationDatabase;
|
||||
using ScadaLink.ConfigurationDatabase.Repositories;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Tests;
|
||||
|
||||
public class SchemaConfigurationTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
|
||||
public SchemaConfigurationTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
// ConfigurationDatabase-006: the gRPC node-address columns must be length-bounded
|
||||
// (HasMaxLength(500)) consistently with the sibling NodeAAddress/NodeBAddress columns,
|
||||
// rather than being left to map to nvarchar(max).
|
||||
|
||||
[Theory]
|
||||
[InlineData(nameof(Site.GrpcNodeAAddress))]
|
||||
[InlineData(nameof(Site.GrpcNodeBAddress))]
|
||||
public void GrpcNodeAddressColumns_AreLengthBoundedTo500(string propertyName)
|
||||
{
|
||||
var property = _context.Model
|
||||
.FindEntityType(typeof(Site))!
|
||||
.FindProperty(propertyName)!;
|
||||
|
||||
Assert.Equal(500, property.GetMaxLength());
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(nameof(Site.NodeAAddress))]
|
||||
[InlineData(nameof(Site.NodeBAddress))]
|
||||
public void GrpcNodeAddressColumns_MatchSiblingNodeAddressBounds(string siblingPropertyName)
|
||||
{
|
||||
var entity = _context.Model.FindEntityType(typeof(Site))!;
|
||||
var siblingMaxLength = entity.FindProperty(siblingPropertyName)!.GetMaxLength();
|
||||
|
||||
Assert.Equal(siblingMaxLength, entity.FindProperty(nameof(Site.GrpcNodeAAddress))!.GetMaxLength());
|
||||
Assert.Equal(siblingMaxLength, entity.FindProperty(nameof(Site.GrpcNodeBAddress))!.GetMaxLength());
|
||||
}
|
||||
}
|
||||
|
||||
public class SplitQueryBehaviourTests : IDisposable
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly TemplateEngineRepository _repository;
|
||||
|
||||
public SplitQueryBehaviourTests()
|
||||
{
|
||||
_context = SqliteTestHelper.CreateInMemoryContext();
|
||||
_repository = new TemplateEngineRepository(_context);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_context.Database.CloseConnection();
|
||||
_context.Dispose();
|
||||
}
|
||||
|
||||
// ConfigurationDatabase-009: the multi-collection eager-load queries were switched to
|
||||
// AsSplitQuery() to avoid cartesian-product joins. The result set must be unchanged —
|
||||
// every member collection still fully populated, with no row duplication.
|
||||
|
||||
[Fact]
|
||||
public async Task GetAllTemplatesAsync_WithMultipleMembersPerCollection_LoadsAllWithoutDuplication()
|
||||
{
|
||||
var template = new Template("MultiMember");
|
||||
for (int i = 0; i < 3; i++)
|
||||
template.Attributes.Add(new TemplateAttribute($"Attr{i}"));
|
||||
for (int i = 0; i < 2; i++)
|
||||
template.Alarms.Add(new TemplateAlarm($"Alarm{i}"));
|
||||
for (int i = 0; i < 4; i++)
|
||||
template.Scripts.Add(new TemplateScript($"Script{i}", "return 1;"));
|
||||
_context.Templates.Add(template);
|
||||
await _context.SaveChangesAsync();
|
||||
_context.ChangeTracker.Clear();
|
||||
|
||||
var all = await _repository.GetAllTemplatesAsync();
|
||||
|
||||
var loaded = Assert.Single(all);
|
||||
// A cartesian-product single query would yield 3 x 2 x 4 = 24 joined rows; the
|
||||
// collections must still contain exactly the inserted counts.
|
||||
Assert.Equal(3, loaded.Attributes.Count);
|
||||
Assert.Equal(2, loaded.Alarms.Count);
|
||||
Assert.Equal(4, loaded.Scripts.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetTemplateByIdAsync_WithMultipleMembers_LoadsAllCollections()
|
||||
{
|
||||
var template = new Template("Single");
|
||||
template.Attributes.Add(new TemplateAttribute("A1"));
|
||||
template.Attributes.Add(new TemplateAttribute("A2"));
|
||||
template.Scripts.Add(new TemplateScript("S1", "return 1;"));
|
||||
_context.Templates.Add(template);
|
||||
await _context.SaveChangesAsync();
|
||||
_context.ChangeTracker.Clear();
|
||||
|
||||
var loaded = await _repository.GetTemplateByIdAsync(template.Id);
|
||||
|
||||
Assert.NotNull(loaded);
|
||||
Assert.Equal(2, loaded!.Attributes.Count);
|
||||
Assert.Single(loaded.Scripts);
|
||||
}
|
||||
}
|
||||
@@ -836,4 +836,63 @@ public class DataConnectionActorTests : TestKit
|
||||
Assert.Equal(5, report.TotalSubscribedTags); // all 5 tags tracked
|
||||
Assert.Equal(3, report.ResolvedTags); // only the 3 good ones resolved
|
||||
}
|
||||
|
||||
// ── DataConnectionLayer-008: HandleUnsubscribe shared-tag reference counting ──
|
||||
|
||||
[Fact]
|
||||
public async Task DCL008_Unsubscribe_OnlyReleasesTagWhenLastSubscriberLeaves()
|
||||
{
|
||||
// Regression test for DataConnectionLayer-008. HandleUnsubscribe must release a
|
||||
// tag at the adapter only when no other instance still subscribes to it. The
|
||||
// O(n) per-tag scan over every instance was replaced with an O(1) reference
|
||||
// count; this guards that the reference count tracks shared subscriptions
|
||||
// correctly — a shared tag is kept while any subscriber remains and the
|
||||
// resolved-tag counter and adapter UnsubscribeAsync stay consistent.
|
||||
var unsubscribed = new System.Collections.Concurrent.ConcurrentBag<string>();
|
||||
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.CompletedTask);
|
||||
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
|
||||
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
|
||||
.Returns(ci => Task.FromResult("sub-" + (string)ci[0]));
|
||||
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new ReadResult(false, null, null));
|
||||
_mockAdapter.UnsubscribeAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
|
||||
.Returns(ci => { unsubscribed.Add((string)ci[0]); return Task.CompletedTask; });
|
||||
|
||||
var actor = CreateConnectionActor("dcl008-shared");
|
||||
await Task.Delay(300);
|
||||
|
||||
// Two instances both subscribe to the shared tag; instA also has an exclusive tag.
|
||||
actor.Tell(new SubscribeTagsRequest("c1", "instA", "dcl008-shared",
|
||||
["shared/tag", "exclusive/a"], DateTimeOffset.UtcNow));
|
||||
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
|
||||
actor.Tell(new SubscribeTagsRequest("c2", "instB", "dcl008-shared",
|
||||
["shared/tag"], DateTimeOffset.UtcNow));
|
||||
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
|
||||
|
||||
// Unsubscribe instA — shared/tag must stay (instB still subscribes); only
|
||||
// exclusive/a is released at the adapter.
|
||||
actor.Tell(new UnsubscribeTagsRequest("c3", "instA", "dcl008-shared", DateTimeOffset.UtcNow));
|
||||
await Task.Delay(300);
|
||||
|
||||
Assert.Contains("sub-exclusive/a", unsubscribed);
|
||||
Assert.DoesNotContain("sub-shared/tag", unsubscribed);
|
||||
|
||||
// Health: 1 tag still subscribed and resolved (shared/tag held by instB).
|
||||
actor.Tell(new DataConnectionActor.GetHealthReport());
|
||||
var report1 = ExpectMsg<DataConnectionHealthReport>(TimeSpan.FromSeconds(3));
|
||||
Assert.Equal(1, report1.TotalSubscribedTags);
|
||||
Assert.Equal(1, report1.ResolvedTags);
|
||||
|
||||
// Unsubscribe instB — now shared/tag has no subscribers and is released.
|
||||
actor.Tell(new UnsubscribeTagsRequest("c4", "instB", "dcl008-shared", DateTimeOffset.UtcNow));
|
||||
await Task.Delay(300);
|
||||
|
||||
Assert.Contains("sub-shared/tag", unsubscribed);
|
||||
|
||||
actor.Tell(new DataConnectionActor.GetHealthReport());
|
||||
var report2 = ExpectMsg<DataConnectionHealthReport>(TimeSpan.FromSeconds(3));
|
||||
Assert.Equal(0, report2.TotalSubscribedTags);
|
||||
Assert.Equal(0, report2.ResolvedTags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,6 +116,45 @@ public class OpcUaDataConnectionTests
|
||||
Assert.Equal(ConnectionHealth.Disconnected, _adapter.Status);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DCL013_ConcurrentConnectionLost_RaisesDisconnectedExactlyOnce()
|
||||
{
|
||||
// Regression test for DataConnectionLayer-013. RaiseDisconnected used a
|
||||
// non-atomic check-then-set on a volatile bool: two threads racing through it
|
||||
// (e.g. the keep-alive thread and a ReadAsync failure path, both routed via
|
||||
// OnClientConnectionLost) could both observe _disconnectFired == false and both
|
||||
// invoke Disconnected. The guard is now an atomic Interlocked.Exchange, so a
|
||||
// burst of concurrent connection-lost callbacks fires the event exactly once.
|
||||
// Repeat the burst: reconnecting between rounds re-arms the guard, so each
|
||||
// round must independently fire Disconnected exactly once. Repetition makes
|
||||
// the (timing-dependent) non-atomic race overwhelmingly likely to be caught.
|
||||
const int rounds = 25;
|
||||
const int threads = 32;
|
||||
for (var round = 0; round < rounds; round++)
|
||||
{
|
||||
_mockClient.IsConnected.Returns(true);
|
||||
await _adapter.ConnectAsync(new Dictionary<string, string>());
|
||||
|
||||
var fired = 0;
|
||||
void Handler() => Interlocked.Increment(ref fired);
|
||||
_adapter.Disconnected += Handler;
|
||||
|
||||
// Fan out: many threads raise the client's ConnectionLost event together.
|
||||
using (var ready = new Barrier(threads))
|
||||
{
|
||||
var tasks = Enumerable.Range(0, threads).Select(_ => Task.Run(() =>
|
||||
{
|
||||
ready.SignalAndWait();
|
||||
_mockClient.ConnectionLost += Raise.Event<Action>();
|
||||
})).ToArray();
|
||||
await Task.WhenAll(tasks);
|
||||
}
|
||||
|
||||
_adapter.Disconnected -= Handler;
|
||||
Assert.Equal(1, fired);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Subscribe_DelegatesAndReturnsId()
|
||||
{
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Text.Json;
|
||||
using Akka.Actor;
|
||||
using Akka.TestKit.Xunit2;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Entities.Deployment;
|
||||
using ScadaLink.Commons.Entities.Sites;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
@@ -12,7 +17,7 @@ namespace ScadaLink.DeploymentManager.Tests;
|
||||
/// <summary>
|
||||
/// WP-7: Tests for system-wide artifact deployment.
|
||||
/// </summary>
|
||||
public class ArtifactDeploymentServiceTests
|
||||
public class ArtifactDeploymentServiceTests : TestKit
|
||||
{
|
||||
private readonly ISiteRepository _siteRepo;
|
||||
private readonly IDeploymentManagerRepository _deploymentRepo;
|
||||
@@ -70,6 +75,86 @@ public class ArtifactDeploymentServiceTests
|
||||
Assert.Equal(3, summary.SiteResults.Count);
|
||||
}
|
||||
|
||||
// ── DeploymentManager-010: one logical deployment id across all per-site commands ──
|
||||
|
||||
[Fact]
|
||||
public async Task DeployToAllSitesAsync_AllPerSiteCommandsShareTheSummaryDeploymentId()
|
||||
{
|
||||
// DeploymentManager-010: previously each per-site DeployArtifactsCommand
|
||||
// minted its own GUID, so one logical deployment produced N+1 unrelated
|
||||
// ids. Every per-site command must now carry the SAME id, equal to the
|
||||
// id reported in the summary and audit log.
|
||||
var sites = new List<Site>
|
||||
{
|
||||
new("Site One", "site-1") { Id = 1 },
|
||||
new("Site Two", "site-2") { Id = 2 }
|
||||
};
|
||||
_siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>()).Returns(sites);
|
||||
var probe = Sys.ActorOf(Props.Create(() => new ArtifactProbeActor()));
|
||||
var service = CreateServiceWithCommActor(probe);
|
||||
|
||||
var result = await service.DeployToAllSitesAsync("admin");
|
||||
|
||||
Assert.True(result.IsSuccess);
|
||||
var commands = ArtifactProbeActor.Received;
|
||||
Assert.Equal(2, commands.Count);
|
||||
|
||||
// All per-site commands carry one shared id, equal to the summary id.
|
||||
var distinctIds = commands.Select(c => c.DeploymentId).Distinct().ToList();
|
||||
Assert.Single(distinctIds);
|
||||
Assert.Equal(result.Value.DeploymentId, distinctIds[0]);
|
||||
|
||||
// The persisted record embeds the same logical deployment id.
|
||||
await _deploymentRepo.Received().AddSystemArtifactDeploymentAsync(
|
||||
Arg.Do<SystemArtifactDeploymentRecord>(r =>
|
||||
{
|
||||
using var doc = JsonDocument.Parse(r.PerSiteStatus!);
|
||||
Assert.Equal(result.Value.DeploymentId,
|
||||
doc.RootElement.GetProperty("DeploymentId").GetString());
|
||||
}),
|
||||
Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
// ── DeploymentManager-014: real per-site success/failure coverage ──
|
||||
|
||||
[Fact]
|
||||
public async Task DeployToAllSitesAsync_PartialFailure_ReportsPerSiteMatrix()
|
||||
{
|
||||
// Site one succeeds, site two fails -> the summary counts must reflect
|
||||
// the per-site matrix.
|
||||
var sites = new List<Site>
|
||||
{
|
||||
new("Site One", "ok-site") { Id = 1 },
|
||||
new("Site Two", "fail-site") { Id = 2 }
|
||||
};
|
||||
_siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>()).Returns(sites);
|
||||
var probe = Sys.ActorOf(Props.Create(() => new ArtifactProbeActor("fail-site")));
|
||||
var service = CreateServiceWithCommActor(probe);
|
||||
|
||||
var result = await service.DeployToAllSitesAsync("admin");
|
||||
|
||||
Assert.True(result.IsSuccess);
|
||||
Assert.Equal(1, result.Value.SuccessCount);
|
||||
Assert.Equal(1, result.Value.FailureCount);
|
||||
Assert.Contains(result.Value.SiteResults, r => r.SiteId == "ok-site" && r.Success);
|
||||
Assert.Contains(result.Value.SiteResults, r => r.SiteId == "fail-site" && !r.Success);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RetryForSiteAsync_SiteSucceeds_ReturnsSuccessAndAudits()
|
||||
{
|
||||
var probe = Sys.ActorOf(Props.Create(() => new ArtifactProbeActor()));
|
||||
var service = CreateServiceWithCommActor(probe);
|
||||
|
||||
var result = await service.RetryForSiteAsync(1, "retry-site", "admin");
|
||||
|
||||
Assert.True(result.IsSuccess);
|
||||
Assert.Equal("retry-site", result.Value.SiteId);
|
||||
await _audit.Received().LogAsync(
|
||||
"admin", "RetryArtifactDeployment", "SystemArtifact",
|
||||
Arg.Any<string>(), "retry-site", Arg.Any<object>(), Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
private ArtifactDeploymentService CreateService()
|
||||
{
|
||||
var comms = new CommunicationService(
|
||||
@@ -83,9 +168,51 @@ public class ArtifactDeploymentServiceTests
|
||||
NullLogger<ArtifactDeploymentService>.Instance);
|
||||
}
|
||||
|
||||
private static DeployArtifactsCommand CreateCommand()
|
||||
private ArtifactDeploymentService CreateServiceWithCommActor(IActorRef commActor)
|
||||
{
|
||||
return new DeployArtifactsCommand(
|
||||
"dep1", null, null, null, null, null, null, DateTimeOffset.UtcNow);
|
||||
var comms = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions
|
||||
{
|
||||
ArtifactDeploymentTimeout = TimeSpan.FromSeconds(5)
|
||||
}),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
comms.SetCommunicationActor(commActor);
|
||||
|
||||
return new ArtifactDeploymentService(
|
||||
_siteRepo, _deploymentRepo, _templateRepo, _externalSystemRepo, _notificationRepo,
|
||||
comms, _audit,
|
||||
Options.Create(new DeploymentManagerOptions
|
||||
{
|
||||
ArtifactDeploymentTimeoutPerSite = TimeSpan.FromSeconds(5)
|
||||
}),
|
||||
NullLogger<ArtifactDeploymentService>.Instance);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stand-in CentralCommunicationActor for artifact deployment. Records every
|
||||
/// <see cref="DeployArtifactsCommand"/> it receives and replies success
|
||||
/// unless the target site id is in the configured failure set.
|
||||
/// </summary>
|
||||
private class ArtifactProbeActor : ReceiveActor
|
||||
{
|
||||
public static readonly ConcurrentBag<DeployArtifactsCommand> Received = new();
|
||||
|
||||
public ArtifactProbeActor(params string[] failingSites)
|
||||
{
|
||||
Received.Clear();
|
||||
var failSet = new HashSet<string>(failingSites);
|
||||
|
||||
Receive<SiteEnvelope>(env =>
|
||||
{
|
||||
if (env.Message is DeployArtifactsCommand cmd)
|
||||
{
|
||||
Received.Add(cmd);
|
||||
var success = !failSet.Contains(env.SiteId);
|
||||
Sender.Tell(new ArtifactDeploymentResponse(
|
||||
cmd.DeploymentId, env.SiteId, success,
|
||||
success ? null : "site rejected artifacts", DateTimeOffset.UtcNow));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -763,6 +763,59 @@ public class DeploymentServiceTests : TestKit
|
||||
Assert.Equal(1, ReconcileProbeActor.DeployCount);
|
||||
}
|
||||
|
||||
// ── DeploymentManager-012: LifecycleCommandTimeout must actually bound lifecycle commands ──
|
||||
|
||||
[Fact]
|
||||
public async Task DisableInstanceAsync_SiteUnresponsive_LifecycleCommandTimeoutBoundsTheWait()
|
||||
{
|
||||
// The site never replies to the DisableInstanceCommand. A short
|
||||
// LifecycleCommandTimeout must abort the wait quickly -- if the option
|
||||
// is dead code the call would instead hang until CommunicationOptions
|
||||
// .LifecycleTimeout (much longer) elapses.
|
||||
var instance = new Instance("StuckInst") { Id = 60, SiteId = 1, State = InstanceState.Enabled };
|
||||
_repo.GetInstanceByIdAsync(60, Arg.Any<CancellationToken>()).Returns(instance);
|
||||
|
||||
// Probe drops every message -> no reply ever arrives.
|
||||
var commActor = Sys.ActorOf(Props.Create(() => new SilentProbeActor()));
|
||||
|
||||
var comms = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions
|
||||
{
|
||||
// Long communication-layer timeout: if LifecycleCommandTimeout
|
||||
// were dead, the test would wait this long.
|
||||
LifecycleTimeout = TimeSpan.FromSeconds(30)
|
||||
}),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
comms.SetCommunicationActor(commActor);
|
||||
|
||||
var siteRepo = Substitute.For<ISiteRepository>();
|
||||
var service = new DeploymentService(
|
||||
_repo, siteRepo, _pipeline, comms, _lockManager, _audit,
|
||||
new DiffService(),
|
||||
Options.Create(new DeploymentManagerOptions
|
||||
{
|
||||
OperationLockTimeout = TimeSpan.FromSeconds(5),
|
||||
LifecycleCommandTimeout = TimeSpan.FromMilliseconds(300)
|
||||
}),
|
||||
NullLogger<DeploymentService>.Instance);
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
var result = await service.DisableInstanceAsync(60, "admin");
|
||||
sw.Stop();
|
||||
|
||||
Assert.True(result.IsFailure);
|
||||
// The 300ms LifecycleCommandTimeout bounded the wait well under the
|
||||
// 30s communication-layer timeout.
|
||||
Assert.True(sw.Elapsed < TimeSpan.FromSeconds(10),
|
||||
$"Lifecycle command was not bounded by LifecycleCommandTimeout (took {sw.Elapsed}).");
|
||||
}
|
||||
|
||||
/// <summary>Stand-in actor that never replies to anything.</summary>
|
||||
private class SilentProbeActor : ReceiveActor
|
||||
{
|
||||
public SilentProbeActor() => ReceiveAny(_ => { });
|
||||
}
|
||||
|
||||
// ── DeploymentManager-003: post-success persistence must commit the Success status ──
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -56,6 +56,98 @@ public class DatabaseGatewayTests
|
||||
() => gateway.CachedWriteAsync("nonexistent", "INSERT INTO t VALUES (1)"));
|
||||
}
|
||||
|
||||
// ── ExternalSystemGateway-014: CachedWrite happy-path buffering ──
|
||||
|
||||
[Fact]
|
||||
public async Task CachedWrite_BuffersTheWriteWithConnectionRetrySettings()
|
||||
{
|
||||
var conn = new DatabaseConnectionDefinition("testDb", "Server=localhost;Database=test")
|
||||
{
|
||||
Id = 1,
|
||||
MaxRetries = 5,
|
||||
RetryDelay = TimeSpan.FromSeconds(12),
|
||||
};
|
||||
_repository.GetAllDatabaseConnectionsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<DatabaseConnectionDefinition> { conn });
|
||||
|
||||
var dbName = $"EsgCachedWrite_{Guid.NewGuid():N}";
|
||||
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
|
||||
using var keepAlive = new Microsoft.Data.Sqlite.SqliteConnection(connStr);
|
||||
keepAlive.Open();
|
||||
var storage = new ScadaLink.StoreAndForward.StoreAndForwardStorage(
|
||||
connStr, NullLogger<ScadaLink.StoreAndForward.StoreAndForwardStorage>.Instance);
|
||||
await storage.InitializeAsync();
|
||||
var sfOptions = new ScadaLink.StoreAndForward.StoreAndForwardOptions
|
||||
{
|
||||
DefaultMaxRetries = 99,
|
||||
DefaultRetryInterval = TimeSpan.FromMinutes(10),
|
||||
RetryTimerInterval = TimeSpan.FromMinutes(10),
|
||||
};
|
||||
var sf = new ScadaLink.StoreAndForward.StoreAndForwardService(
|
||||
storage, sfOptions, NullLogger<ScadaLink.StoreAndForward.StoreAndForwardService>.Instance);
|
||||
|
||||
var gateway = new DatabaseGateway(_repository, NullLogger<DatabaseGateway>.Instance, storeAndForward: sf);
|
||||
|
||||
await gateway.CachedWriteAsync("testDb", "INSERT INTO t VALUES (@v)",
|
||||
new Dictionary<string, object?> { ["v"] = 1 });
|
||||
|
||||
var depth = await storage.GetBufferDepthByCategoryAsync();
|
||||
Assert.Equal(1, depth[ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.CachedDbWrite]);
|
||||
|
||||
var (maxRetries, retryIntervalMs) = ReadBufferedRetrySettings(connStr);
|
||||
Assert.Equal(5, maxRetries);
|
||||
Assert.Equal((long)TimeSpan.FromSeconds(12).TotalMilliseconds, retryIntervalMs);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CachedWrite_ZeroMaxRetriesIsHonouredNotTreatedAsUnset()
|
||||
{
|
||||
var conn = new DatabaseConnectionDefinition("testDb", "Server=localhost;Database=test")
|
||||
{
|
||||
Id = 1,
|
||||
MaxRetries = 0,
|
||||
RetryDelay = TimeSpan.FromSeconds(3),
|
||||
};
|
||||
_repository.GetAllDatabaseConnectionsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<DatabaseConnectionDefinition> { conn });
|
||||
|
||||
var dbName = $"EsgCachedWriteZero_{Guid.NewGuid():N}";
|
||||
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
|
||||
using var keepAlive = new Microsoft.Data.Sqlite.SqliteConnection(connStr);
|
||||
keepAlive.Open();
|
||||
var storage = new ScadaLink.StoreAndForward.StoreAndForwardStorage(
|
||||
connStr, NullLogger<ScadaLink.StoreAndForward.StoreAndForwardStorage>.Instance);
|
||||
await storage.InitializeAsync();
|
||||
var sfOptions = new ScadaLink.StoreAndForward.StoreAndForwardOptions
|
||||
{
|
||||
DefaultMaxRetries = 99,
|
||||
DefaultRetryInterval = TimeSpan.FromMinutes(10),
|
||||
RetryTimerInterval = TimeSpan.FromMinutes(10),
|
||||
};
|
||||
var sf = new ScadaLink.StoreAndForward.StoreAndForwardService(
|
||||
storage, sfOptions, NullLogger<ScadaLink.StoreAndForward.StoreAndForwardService>.Instance);
|
||||
|
||||
var gateway = new DatabaseGateway(_repository, NullLogger<DatabaseGateway>.Instance, storeAndForward: sf);
|
||||
|
||||
await gateway.CachedWriteAsync("testDb", "INSERT INTO t VALUES (1)");
|
||||
|
||||
var (maxRetries, _) = ReadBufferedRetrySettings(connStr);
|
||||
Assert.Equal(0, maxRetries); // honoured — not the S&F default of 99
|
||||
}
|
||||
|
||||
private static (int MaxRetries, long RetryIntervalMs) ReadBufferedRetrySettings(string connStr)
|
||||
{
|
||||
using var conn = new Microsoft.Data.Sqlite.SqliteConnection(connStr);
|
||||
conn.Open();
|
||||
using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = "SELECT max_retries, retry_interval_ms FROM sf_messages";
|
||||
using var reader = cmd.ExecuteReader();
|
||||
Assert.True(reader.Read(), "expected exactly one buffered message");
|
||||
var result = (reader.GetInt32(0), reader.GetInt64(1));
|
||||
Assert.False(reader.Read(), "expected exactly one buffered message");
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── ExternalSystemGateway-001: buffered CachedDbWrite delivery handler ──
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
using System.Net;
|
||||
using System.Net.Http.Headers;
|
||||
using Microsoft.Data.Sqlite;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Entities.ExternalSystems;
|
||||
@@ -603,6 +605,237 @@ public class ExternalSystemClientTests
|
||||
"A caller-cancelled CachedCall must not be buffered for retry");
|
||||
}
|
||||
|
||||
// ── ExternalSystemGateway-014: BuildUrl query-string, ApplyAuth, connection errors ──
|
||||
|
||||
[Fact]
|
||||
public async Task Call_GetWithParameters_AppendsEscapedQueryString()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||
var method = new ExternalSystemMethod("search", "GET", "/search") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
var handler = new RequestCapturingHandler(HttpStatusCode.OK, "{}");
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(new HttpClient(handler));
|
||||
|
||||
var client = new ExternalSystemClient(
|
||||
_httpClientFactory, _repository, NullLogger<ExternalSystemClient>.Instance);
|
||||
|
||||
await client.CallAsync("TestAPI", "search", new Dictionary<string, object?>
|
||||
{
|
||||
["q"] = "a b&c",
|
||||
["page"] = 2,
|
||||
});
|
||||
|
||||
// AbsoluteUri preserves percent-encoding; the '&' inside a value must be
|
||||
// escaped so it is not mistaken for a parameter separator.
|
||||
var uri = handler.LastUri!.AbsoluteUri;
|
||||
Assert.StartsWith("https://api.example.com/search?", uri);
|
||||
Assert.Contains("q=a%20b%26c", uri);
|
||||
Assert.Contains("page=2", uri);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Call_PostWithParameters_SendsJsonBody()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||
var method = new ExternalSystemMethod("create", "POST", "/create") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
var handler = new RequestCapturingHandler(HttpStatusCode.OK, "{}");
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(new HttpClient(handler));
|
||||
|
||||
var client = new ExternalSystemClient(
|
||||
_httpClientFactory, _repository, NullLogger<ExternalSystemClient>.Instance);
|
||||
|
||||
await client.CallAsync("TestAPI", "create", new Dictionary<string, object?> { ["name"] = "widget" });
|
||||
|
||||
Assert.Equal("https://api.example.com/create", handler.LastUri!.ToString());
|
||||
Assert.Contains("\"name\":\"widget\"", handler.LastBody);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Call_ApiKeyAuthWithDefaultHeader_SendsXApiKeyHeader()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "apikey")
|
||||
{
|
||||
Id = 1,
|
||||
AuthConfiguration = "secret-key-123",
|
||||
};
|
||||
var method = new ExternalSystemMethod("getData", "GET", "/data") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
var handler = new RequestCapturingHandler(HttpStatusCode.OK, "{}");
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(new HttpClient(handler));
|
||||
|
||||
var client = new ExternalSystemClient(
|
||||
_httpClientFactory, _repository, NullLogger<ExternalSystemClient>.Instance);
|
||||
|
||||
await client.CallAsync("TestAPI", "getData");
|
||||
|
||||
Assert.True(handler.LastHeaders!.TryGetValues("X-API-Key", out var values));
|
||||
Assert.Equal("secret-key-123", values!.Single());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Call_ApiKeyAuthWithCustomHeader_SendsNamedHeader()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "apikey")
|
||||
{
|
||||
Id = 1,
|
||||
AuthConfiguration = "Authorization-Token:abc",
|
||||
};
|
||||
var method = new ExternalSystemMethod("getData", "GET", "/data") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
var handler = new RequestCapturingHandler(HttpStatusCode.OK, "{}");
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(new HttpClient(handler));
|
||||
|
||||
var client = new ExternalSystemClient(
|
||||
_httpClientFactory, _repository, NullLogger<ExternalSystemClient>.Instance);
|
||||
|
||||
await client.CallAsync("TestAPI", "getData");
|
||||
|
||||
Assert.True(handler.LastHeaders!.TryGetValues("Authorization-Token", out var values));
|
||||
Assert.Equal("abc", values!.Single());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Call_BasicAuth_SendsBase64AuthorizationHeader()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "basic")
|
||||
{
|
||||
Id = 1,
|
||||
AuthConfiguration = "alice:s3cret",
|
||||
};
|
||||
var method = new ExternalSystemMethod("getData", "GET", "/data") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
var handler = new RequestCapturingHandler(HttpStatusCode.OK, "{}");
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(new HttpClient(handler));
|
||||
|
||||
var client = new ExternalSystemClient(
|
||||
_httpClientFactory, _repository, NullLogger<ExternalSystemClient>.Instance);
|
||||
|
||||
await client.CallAsync("TestAPI", "getData");
|
||||
|
||||
var auth = handler.LastHeaders!.Authorization;
|
||||
Assert.NotNull(auth);
|
||||
Assert.Equal("Basic", auth!.Scheme);
|
||||
var decoded = System.Text.Encoding.UTF8.GetString(Convert.FromBase64String(auth.Parameter!));
|
||||
Assert.Equal("alice:s3cret", decoded);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Call_ConnectionError_IsClassifiedAsTransient()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||
var method = new ExternalSystemMethod("getData", "GET", "/data") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
// A connection-level failure (e.g. host unreachable) surfaces as HttpRequestException.
|
||||
var handler = new ThrowingHttpMessageHandler(new HttpRequestException("connection refused"));
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(new HttpClient(handler));
|
||||
|
||||
var client = new ExternalSystemClient(
|
||||
_httpClientFactory, _repository, NullLogger<ExternalSystemClient>.Instance);
|
||||
|
||||
var result = await client.CallAsync("TestAPI", "getData");
|
||||
|
||||
Assert.False(result.Success);
|
||||
Assert.Contains("Transient error", result.ErrorMessage);
|
||||
}
|
||||
|
||||
// ── ExternalSystemGateway-012: permanent failures must be logged ──
|
||||
|
||||
[Fact]
|
||||
public async Task Call_PermanentFailure_LogsAWarning()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||
var method = new ExternalSystemMethod("badMethod", "POST", "/bad") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
var handler = new MockHttpMessageHandler(HttpStatusCode.BadRequest, "bad request");
|
||||
var httpClient = new HttpClient(handler);
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(httpClient);
|
||||
|
||||
var logger = new CapturingLogger<ExternalSystemClient>();
|
||||
var client = new ExternalSystemClient(_httpClientFactory, _repository, logger);
|
||||
|
||||
await client.CallAsync("TestAPI", "badMethod");
|
||||
|
||||
// The design doc requires permanent failures to be surfaced to Site Event
|
||||
// Logging — the gateway must emit at least a warning, not stay silent.
|
||||
Assert.Contains(logger.Entries, e =>
|
||||
e.Level >= LogLevel.Warning && e.Message.Contains("TestAPI"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Call_TransientFailure_DoesNotLogAtWarningOrAbove()
|
||||
{
|
||||
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||
var method = new ExternalSystemMethod("failMethod", "POST", "/fail") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemDefinition> { system });
|
||||
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||
.Returns(new List<ExternalSystemMethod> { method });
|
||||
|
||||
var handler = new MockHttpMessageHandler(HttpStatusCode.InternalServerError, "boom");
|
||||
var httpClient = new HttpClient(handler);
|
||||
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(httpClient);
|
||||
|
||||
var logger = new CapturingLogger<ExternalSystemClient>();
|
||||
var client = new ExternalSystemClient(_httpClientFactory, _repository, logger);
|
||||
|
||||
await client.CallAsync("TestAPI", "failMethod");
|
||||
|
||||
// A transient failure is normal operation handled by retry/S&F — it must not
|
||||
// be logged at warning level (only permanent failures are).
|
||||
Assert.DoesNotContain(logger.Entries, e => e.Level >= LogLevel.Warning);
|
||||
}
|
||||
|
||||
/// <summary>Test helper: an ILogger that records every entry for assertions.</summary>
|
||||
private sealed class CapturingLogger<T> : ILogger<T>
|
||||
{
|
||||
public List<(LogLevel Level, string Message)> Entries { get; } = new();
|
||||
|
||||
public IDisposable BeginScope<TState>(TState state) where TState : notnull => NullScope.Instance;
|
||||
public bool IsEnabled(LogLevel logLevel) => true;
|
||||
|
||||
public void Log<TState>(
|
||||
LogLevel logLevel, EventId eventId, TState state, Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
Entries.Add((logLevel, formatter(state, exception)));
|
||||
}
|
||||
|
||||
private sealed class NullScope : IDisposable
|
||||
{
|
||||
public static readonly NullScope Instance = new();
|
||||
public void Dispose() { }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test helper: mock HTTP message handler.
|
||||
/// </summary>
|
||||
@@ -667,7 +900,7 @@ public class ExternalSystemClientTests
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test helper: captures the request URI of the last request.
|
||||
/// Test helper: captures the URI, headers and body of the last request.
|
||||
/// </summary>
|
||||
private class RequestCapturingHandler : HttpMessageHandler
|
||||
{
|
||||
@@ -681,17 +914,31 @@ public class ExternalSystemClientTests
|
||||
}
|
||||
|
||||
public Uri? LastUri { get; private set; }
|
||||
public HttpRequestHeaders? LastHeaders { get; private set; }
|
||||
public string? LastBody { get; private set; }
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
LastUri = request.RequestUri;
|
||||
return Task.FromResult(new HttpResponseMessage(_statusCode)
|
||||
LastHeaders = request.Headers;
|
||||
LastBody = request.Content == null ? null : await request.Content.ReadAsStringAsync(cancellationToken);
|
||||
return new HttpResponseMessage(_statusCode)
|
||||
{
|
||||
Content = new StringContent(_body)
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Test helper: an HTTP handler that throws a connection-level exception.</summary>
|
||||
private class ThrowingHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly Exception _exception;
|
||||
public ThrowingHttpMessageHandler(Exception exception) => _exception = exception;
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
=> throw _exception;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test helper: an HTTP handler that hangs until cancelled (simulates a slow/hung system).
|
||||
/// </summary>
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Http;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
|
||||
namespace ScadaLink.ExternalSystemGateway.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// ExternalSystemGateway-013: configuration options must actually influence the
|
||||
/// registered HTTP client — an operator setting them must not be silently ignored.
|
||||
/// </summary>
|
||||
public class ServiceWiringTests
|
||||
{
|
||||
[Fact]
|
||||
public void MaxConcurrentConnectionsPerSystem_IsAppliedToTheNamedHttpClientPrimaryHandler()
|
||||
{
|
||||
var config = new ConfigurationBuilder()
|
||||
.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["ScadaLink:ExternalSystemGateway:MaxConcurrentConnectionsPerSystem"] = "4",
|
||||
})
|
||||
.Build();
|
||||
|
||||
var services = new ServiceCollection();
|
||||
services.AddLogging();
|
||||
services.AddSingleton<IConfiguration>(config);
|
||||
services.AddSingleton(Substitute.For<IExternalSystemRepository>());
|
||||
services.AddExternalSystemGateway();
|
||||
|
||||
using var provider = services.BuildServiceProvider();
|
||||
|
||||
// Resolve the per-system named client's message-handler chain and walk to the
|
||||
// primary handler — the option must be reflected in MaxConnectionsPerServer.
|
||||
var handlerFactory = provider.GetRequiredService<IHttpMessageHandlerFactory>();
|
||||
var handler = handlerFactory.CreateHandler("ExternalSystem_AnySystem");
|
||||
|
||||
var primary = FindPrimaryHandler(handler);
|
||||
var sockets = Assert.IsType<SocketsHttpHandler>(primary);
|
||||
Assert.Equal(4, sockets.MaxConnectionsPerServer);
|
||||
}
|
||||
|
||||
private static HttpMessageHandler FindPrimaryHandler(HttpMessageHandler handler)
|
||||
{
|
||||
var current = handler;
|
||||
while (current is DelegatingHandler delegating && delegating.InnerHandler != null)
|
||||
{
|
||||
current = delegating.InnerHandler;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
}
|
||||
@@ -110,6 +110,29 @@ public class CentralHealthReportLoopTests
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-006 regression: the central loop's sequence-number seed
|
||||
/// must be derived from the injected <see cref="TimeProvider"/> (Unix-ms),
|
||||
/// not from <c>DateTimeOffset.UtcNow</c> read at field initialization, so the
|
||||
/// seeding strategy is deterministically testable.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void SequenceNumberSeed_UsesInjectedTimeProvider()
|
||||
{
|
||||
var fixedInstant = new DateTimeOffset(2026, 5, 16, 12, 0, 0, TimeSpan.Zero);
|
||||
var timeProvider = new TestTimeProvider(fixedInstant);
|
||||
|
||||
var loop = new CentralHealthReportLoop(
|
||||
new SiteHealthCollector(),
|
||||
new RecordingAggregator(),
|
||||
new FakeClusterNodeProvider { SelfIsPrimary = true },
|
||||
Options.Create(new HealthMonitoringOptions()),
|
||||
NullLogger<CentralHealthReportLoop>.Instance,
|
||||
timeProvider);
|
||||
|
||||
Assert.Equal(fixedInstant.ToUnixTimeMilliseconds(), loop.CurrentSequenceNumber);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SetsActiveNodeFlag_EvenWhenNotPrimary()
|
||||
{
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using Microsoft.Data.Sqlite;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
@@ -20,6 +21,44 @@ public class HealthReportSenderTests
|
||||
public string SiteId { get; set; } = "test-site";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Captures emitted log entries so tests can assert that non-fatal failures
|
||||
/// are surfaced (HealthMonitoring-010) rather than silently swallowed.
|
||||
/// </summary>
|
||||
private sealed class CapturingLogger<T> : ILogger<T>
|
||||
{
|
||||
public sealed record Entry(LogLevel Level, string Message, Exception? Exception);
|
||||
|
||||
public List<Entry> Entries { get; } = [];
|
||||
|
||||
public IDisposable BeginScope<TState>(TState state) where TState : notnull => NullScope.Instance;
|
||||
public bool IsEnabled(LogLevel logLevel) => true;
|
||||
|
||||
public void Log<TState>(
|
||||
LogLevel logLevel, EventId eventId, TState state, Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
lock (Entries)
|
||||
{
|
||||
Entries.Add(new Entry(logLevel, formatter(state, exception), exception));
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class NullScope : IDisposable
|
||||
{
|
||||
public static readonly NullScope Instance = new();
|
||||
public void Dispose() { }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>An <see cref="IClusterNodeProvider"/> whose query always throws.</summary>
|
||||
private sealed class ThrowingClusterNodeProvider : IClusterNodeProvider
|
||||
{
|
||||
public bool SelfIsPrimary => true;
|
||||
public IReadOnlyList<NodeStatus> GetClusterNodes() =>
|
||||
throw new InvalidOperationException("cluster query failed");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SendsReportsWithMonotonicSequenceNumbers()
|
||||
{
|
||||
@@ -226,4 +265,76 @@ public class HealthReportSenderTests
|
||||
|
||||
Assert.InRange(sender.CurrentSequenceNumber, beforeCtor, afterCtor);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-010 regression: a failure refreshing cluster nodes is
|
||||
/// non-fatal (the report still ships) but must no longer be swallowed by a
|
||||
/// bare <c>catch {}</c> — it must be logged as a warning with the exception so
|
||||
/// persistent degradation is diagnosable.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ClusterNodeRefreshFailure_IsLoggedNotSwallowed()
|
||||
{
|
||||
var transport = new FakeTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
collector.SetActiveNode(true);
|
||||
var logger = new CapturingLogger<HealthReportSender>();
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
logger,
|
||||
new FakeSiteIdentityProvider(),
|
||||
clusterNodeProvider: new ThrowingClusterNodeProvider());
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(300));
|
||||
try
|
||||
{
|
||||
await sender.StartAsync(cts.Token);
|
||||
await Task.Delay(250, CancellationToken.None);
|
||||
await sender.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
|
||||
// The report loop continues despite the failure...
|
||||
Assert.NotEmpty(transport.SentReports);
|
||||
// ...but the failure is surfaced as a warning carrying the exception.
|
||||
CapturingLogger<HealthReportSender>.Entry[] warnings;
|
||||
lock (logger.Entries)
|
||||
{
|
||||
warnings = logger.Entries
|
||||
.Where(e => e.Level == LogLevel.Warning && e.Exception is InvalidOperationException)
|
||||
.ToArray();
|
||||
}
|
||||
Assert.NotEmpty(warnings);
|
||||
Assert.Contains(warnings, w => w.Message.Contains("cluster nodes", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-006 regression: the sequence-number seed must be derived
|
||||
/// from the injected <see cref="TimeProvider"/> so the Unix-ms seeding strategy
|
||||
/// is deterministically testable and the clock dependency is explicit, rather
|
||||
/// than reading <c>DateTimeOffset.UtcNow</c> directly at field initialization.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void SequenceNumberSeed_UsesInjectedTimeProvider()
|
||||
{
|
||||
var fixedInstant = new DateTimeOffset(2026, 5, 16, 12, 0, 0, TimeSpan.Zero);
|
||||
var timeProvider = new TestTimeProvider(fixedInstant);
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
new SiteHealthCollector(),
|
||||
new FakeTransport(),
|
||||
Options.Create(new HealthMonitoringOptions()),
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider(),
|
||||
timeProvider: timeProvider);
|
||||
|
||||
Assert.Equal(fixedInstant.ToUnixTimeMilliseconds(), sender.CurrentSequenceNumber);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user