From 5fe08eacebdbf07b0c6190a14b0583b811fb7315 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 03:17:59 -0400 Subject: [PATCH 01/23] docs(plan): audit-log deferred follow-ups implementation plan --- docs/plans/2026-05-21-audit-log-followups.md | 249 ++++++++++++++++++ ...26-05-21-audit-log-followups.md.tasks.json | 17 ++ 2 files changed, 266 insertions(+) create mode 100644 docs/plans/2026-05-21-audit-log-followups.md create mode 100644 docs/plans/2026-05-21-audit-log-followups.md.tasks.json diff --git a/docs/plans/2026-05-21-audit-log-followups.md b/docs/plans/2026-05-21-audit-log-followups.md new file mode 100644 index 0000000..37f57fb --- /dev/null +++ b/docs/plans/2026-05-21-audit-log-followups.md @@ -0,0 +1,249 @@ +# Audit Log #23 — Deferred Follow-ups Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:subagent-driven-development to implement this plan task-by-task (bundled cadence — one implementer + one review pass per task). + +**Goal:** Close the five deferred implementation follow-ups from the Audit Log #23 roadmap so site audit events actually reach central, the audit/SiteCall surfaces are complete, and known tech debt is paid down. + +**Architecture:** Five independent-ish workstreams against the existing ScadaLink codebase. The headline change: site→central audit forwarding moves from the production `NoOpSiteStreamAuditClient` stub to a real **ClusterClient-based push** — the same transport notifications already use (`SiteCommunicationActor` → `ClusterClient.Send("/user/central-communication", …)` → `CentralCommunicationActor`), avoiding a new central-hosted gRPC server. The remaining four follow-ups are scoped tech-debt / UI / contract changes. + +**Tech Stack:** .NET 10, Akka.NET (ClusterClient, ClusterClientReceptionist, cluster singletons, TestKit), EF Core 10 (MS SQL + SQLite providers), Blazor Server + Bootstrap CSS (no third-party UI libs), System.CommandLine, xUnit + Akka.TestKit.Xunit2 + bUnit + NSubstitute, Playwright. + +**Spec sources:** `alog.md`, `docs/requirements/Component-AuditLog.md`, `docs/requirements/Component-SiteCallAudit.md`, `docs/plans/2026-05-20-audit-log-code-roadmap.md` (header lines 14–19 enumerate the deferred items). + +**Ground rules (carry into every task):** +- Branch off `main` before any code change; never commit on `main`. +- Edit in place. Never touch `infra/*`. The `docker/*` cluster config is touched only if a task explicitly says so (none here do). +- Stage with explicit `git add ` — never `git add .`, never `git commit -am`. +- TDD: failing test → minimal code → green → commit. Full solution stays green (`dotnet build ScadaLink.slnx`, `dotnet test ScadaLink.slnx`). +- Additive message-contract evolution where possible; where a contract shape must change (Task 8), update every call site in the same task. +- Do not push to origin — the user authorizes pushes separately. + +--- + +## Task 0: Prep — feature branch + +**Files:** none (git only). + +**Step 1:** From a clean `main`, create the working branch: +```bash +git checkout main && git status --porcelain # expect clean +git checkout -b feature/audit-log-followups +``` + +**Step 2:** Confirm baseline green: +```bash +dotnet build ScadaLink.slnx +``` +Expected: build succeeds. (A full `dotnet test` baseline is optional but recommended.) + +**Acceptance:** on branch `feature/audit-log-followups`, solution builds. + +--- + +## Task 1: Audit push — central ingest routing over ClusterClient + +**What:** Make the receptionist-registered `CentralCommunicationActor` accept `IngestAuditEventsCommand` (and `IngestCachedTelemetryCommand`) from a site ClusterClient, forward to the `AuditLogIngestActor` cluster-singleton proxy, and pipe the reply back. Mirror the existing `NotificationSubmit` / `RegisterNotificationOutbox` pattern exactly. + +**Files:** +- Modify: `src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs` — add `Receive` + `Receive` handlers; add a `RegisterAuditIngest` registration message handler holding the `AuditLogIngestActor` proxy `IActorRef` (mirror `RegisterNotificationOutbox` at line ~120 / `HandleNotificationSubmit` at line ~130). +- Create: `src/ScadaLink.Commons/Messages/Audit/RegisterAuditIngest.cs` — `public sealed record RegisterAuditIngest(IActorRef AuditIngestActor);` (mirror `RegisterNotificationOutbox`). +- Modify: `src/ScadaLink.Host/Actors/AkkaHostedService.cs` — after the central `AuditLogIngestActor` singleton + proxy are created (~lines 355–379), `Tell` the `RegisterAuditIngest` to the `CentralCommunicationActor` (mirror how the Notification Outbox proxy is registered). +- Test: `tests/ScadaLink.Communication.Tests/Actors/CentralCommunicationActorAuditTests.cs` (new). + +**Approach:** +- Handler `Ask`s the registered audit-ingest proxy and `PipeTo`s the `IngestAuditEventsReply` back to the original `Sender` (the ClusterClient round-trips it to the site). Use the existing audit-ingest Ask-timeout convention (30s — see `SiteStreamGrpcServer` `AuditIngestAskTimeout`); add a bound option if no constant is reachable. +- If no audit-ingest proxy is registered yet (startup race), reply with an empty `IngestAuditEventsReply([])` — the site keeps the rows `Pending` and retries, exactly as the gRPC handler does today. +- `IngestCachedTelemetryCommand` is routed the same way (its reply type is the same `IngestAuditEventsReply` per `AuditLogIngestActor`). + +**Tests (TestKit + NSubstitute):** +1. `IngestAuditEventsCommand` with an audit-ingest probe registered → probe receives the command, actor replies the probe's `IngestAuditEventsReply` to the sender. +2. `IngestAuditEventsCommand` with no audit-ingest registered → sender gets `IngestAuditEventsReply` with empty `AcceptedEventIds`. +3. `IngestCachedTelemetryCommand` routes to the same proxy. + +**Steps:** write failing tests → run (fail) → implement record + handlers + Host registration → run (pass) → `dotnet build ScadaLink.slnx` → commit. + +**Commit:** `feat(communication): route audit ingest commands through CentralCommunicationActor` + +--- + +## Task 2: Audit push — real site client, Host wiring, integration test + +**What:** Replace `NoOpSiteStreamAuditClient` (production binding) with a real `ISiteStreamAuditClient` that pushes over ClusterClient via the site's `SiteCommunicationActor`. After this task the site `auditlog.db` `Pending` backlog drains to central. + +**Files:** +- Create: `src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs` — implements `ISiteStreamAuditClient`; ctor takes the `SiteCommunicationActor` `IActorRef` + an Ask timeout. +- Modify: `src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs` — ensure `IngestAuditEventsCommand` / `IngestCachedTelemetryCommand` are forwarded over `ClusterClient.Send("/user/central-communication", …)` with the reply routed back to the Ask (mirror the `NotificationSubmit` forward at lines ~190/214/224). +- Modify: `src/ScadaLink.Host/Actors/AkkaHostedService.cs` — in the site telemetry wiring (~lines 648–681), construct `ClusterClientSiteAuditClient` with the `SiteCommunicationActor` ref and pass it to `SiteAuditTelemetryActor` instead of the DI-resolved `NoOpSiteStreamAuditClient`. +- Modify: `src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs` (line ~124–129) — keep `NoOpSiteStreamAuditClient` as the DI default (it remains correct for central/test composition roots that have no `SiteCommunicationActor`); update the stale comment that says "M6's reconciliation work brings the real implementation". +- Test: `tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs` (new); extend `tests/ScadaLink.IntegrationTests/AuditLog/` with a ClusterClient-push end-to-end test. + +**Approach:** +- `IngestAuditEventsAsync(AuditEventBatch, ct)` maps the batch to `IngestAuditEventsCommand(IReadOnlyList)`, `Ask`s the `SiteCommunicationActor` for `IngestAuditEventsReply`, maps the reply's `AcceptedEventIds` back into the `IngestAck` the `SiteAuditTelemetryActor` expects. +- An Ask timeout / failure must **throw** — `SiteAuditTelemetryActor`'s drain loop already treats a thrown exception as transient (rows stay `Pending`, retried next tick). Keep that contract. +- `IngestCachedTelemetryAsync` does the same with `IngestCachedTelemetryCommand`. (`CachedCallTelemetryForwarder` already resolves `ISiteStreamAuditClient` — no change there.) +- `AuditEvent` already crosses the wire as the `NotificationSubmit` records do; confirm the Akka serializer handles `IReadOnlyList` (notification messages prove the pattern). + +**Tests:** +1. `IngestAuditEventsAsync` → batch becomes one `IngestAuditEventsCommand`; mocked actor reply's accepted ids map onto `IngestAck`. +2. Partial ack (3 of 5 ids) → `IngestAck` lists only the 3. +3. Ask timeout → method throws (drain loop keeps rows `Pending`). +4. Integration: boot a site+central pair via the IntegrationTests harness, write an audit event on the site hot-path, assert a central `AuditLog` row appears within ~10s and the site row flips to `Forwarded`. + +**Commit:** `feat(auditlog): real ClusterClient-based site audit push client` + +--- + +## Task 3: Consolidate the duplicated audit DTO mappers + +**What:** Collapse the 4 near-duplicate `AuditEvent`↔`AuditEventDto` mapping copies into one canonical mapper. The project-reference cycle (`AuditLog → Communication`, never the reverse) is resolved by hosting the canonical mapper **in `ScadaLink.Communication`** — it owns the generated `AuditEventDto` and references `Commons` for `AuditEvent`, and `AuditLog` already references `Communication`. + +**Files:** +- Create: `src/ScadaLink.Communication/Grpc/AuditEventDtoMapper.cs` — `public static class` with `ToDto(AuditEvent) → AuditEventDto` and `FromDto(AuditEventDto) → AuditEvent` (lift the canonical logic from `AuditLog/Telemetry/AuditEventMapper.cs`). +- Modify: `src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs` — replace the inlined `IngestAuditEvents` loop (~lines 265–295), `AuditEventToDto` (~490–517) and `MapAuditEventFromDto` (~537–561) with calls to `AuditEventDtoMapper`. +- Delete: `src/ScadaLink.AuditLog/Telemetry/AuditEventMapper.cs`; update its callers in `ScadaLink.AuditLog` to use `Communication`'s `AuditEventDtoMapper`. +- Leave untouched: `SqliteAuditWriter.MapRow` (SQLite `DataReader` → `AuditEvent`, not a DTO mapper — different source type) and `MapSiteCallFromDto` (SiteCall, not audit). Note this in the commit body. +- Test: move/merge `tests/ScadaLink.AuditLog.Tests/Telemetry/AuditEventMapperTests.cs` into `tests/ScadaLink.Communication.Tests/Grpc/AuditEventDtoMapperTests.cs`; keep round-trip coverage (`FromDto(ToDto(x)) == x`). + +**Approach:** Pure refactor — no behaviour change. Verify field-by-field parity against all 3 inlined copies before deleting them (null handling, enum parsing, `Int32Value`/`Timestamp` wrapping). + +**Steps:** create mapper + tests → run → swap call sites → delete old copies → `dotnet build` + `dotnet test ScadaLink.slnx` (all green, no behaviour drift) → commit. + +**Commit:** `refactor(auditlog): consolidate AuditEvent DTO mappers into Communication` + +--- + +## Task 4: Site Call Audit — query / KPI / detail backend + +**What:** Build the missing read-side backend for the Site Calls UI: Commons message contracts, `SiteCallAuditActor` query/KPI/detail handlers, and `CommunicationService` methods. Mirror `NotificationOutboxQueries.cs` + the Notification Outbox actor/service shape. Spec: `Component-SiteCallAudit.md` §KPIs and §queryable list. + +**Files:** +- Create: `src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs` — records mirroring `NotificationOutboxQueries.cs`: + - `SiteCallQueryRequest` (CorrelationId, status/site/kind/target filters, date range, page cursor fields, PageSize) + - `SiteCallSummary` (TrackedOperationId, SourceSite, Kind, TargetSummary, Status, RetryCount, LastError, provenance, CreatedAtUtc, UpdatedAtUtc, TerminalAtUtc) + - `SiteCallQueryResponse` (CorrelationId, Success, ErrorMessage, IReadOnlyList, next-cursor fields) + - `SiteCallKpiRequest` / `SiteCallKpiResponse` (BufferedCount, ParkedCount, FailedLastInterval, DeliveredLastInterval, OldestPendingAge, StuckCount — mirror the Notification Outbox KPI shape; also a per-site variant) + - `SiteCallDetailRequest` / `SiteCallDetailResponse` / `SiteCallDetail` (full row incl. LastError, all timestamps). +- Modify: `src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs` — add `ReceiveAsync` handlers for the query / KPI / detail requests; query handler calls `ISiteCallAuditRepository.QueryAsync` (keyset paging on `(CreatedAtUtc DESC, TrackedOperationId DESC)`); KPI handler computes point-in-time counts from the `SiteCalls` table (stuck = `Pending`/`Retrying` older than the configurable threshold, default 10 min). Use the per-message DI scope pattern already in the actor. +- Add repo support if needed: `src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs` may need a KPI-count method + a detail `GetAsync` (a `GetAsync(TrackedOperationId)` already exists). +- Modify: `src/ScadaLink.Communication/CommunicationService.cs` — add `QuerySiteCallsAsync`, `GetSiteCallKpisAsync`, `GetPerSiteSiteCallKpisAsync`, `GetSiteCallDetailAsync` (mirror `QueryNotificationOutboxAsync` etc.: `Ask` the `SiteCallAuditActor` proxy with `_options.QueryTimeout`). +- Test: `tests/ScadaLink.SiteCallAudit.Tests/` (actor handlers), `tests/ScadaLink.Commons.Tests/` (contract shape), `tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs` (extend for KPI counts). + +**Commit:** `feat(sitecallaudit): query, KPI and detail backend for the Site Calls page` + +--- + +## Task 5: Site Call Audit — Retry/Discard relay to owning site + +**What:** Central UI Retry/Discard on a parked Site Call must relay `RetryParkedOperation` / `DiscardParkedOperation` to the **owning site** (sites are the source of truth — central never mutates the `SiteCalls` row directly; the corrected row arrives back via telemetry). Spec: `Component-SiteCallAudit.md` §actions-on-parked-rows. + +**Files:** +- Create: `src/ScadaLink.Commons/Messages/Audit/SiteCallRelayMessages.cs` — `RetryParkedOperationRequest`/`Response`, `DiscardParkedOperationRequest`/`Response` (carry `TrackedOperationId`, `SourceSite`, `CorrelationId`; response carries Success + a "site unreachable" error case). +- Modify: `src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs` (or a small relay collaborator) — on a relay request, look up the owning site and forward `RetryParkedOperation`/`DiscardParkedOperation` to that site over the central→site ClusterClient (the central side already maintains one ClusterClient per site; reuse the `CentralCommunicationActor` site-addressing path). On no/late reply → respond "site unreachable". +- Modify: `src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs` — receive `RetryParkedOperation`/`DiscardParkedOperation` and hand to the site operation-tracking subsystem. +- Modify the site operation-tracking owner (S&F operation-tracking store / `ParkedMessageHandlerActor` in `src/ScadaLink.StoreAndForward/`) — Retry resets a parked tracked operation to `Pending` for the retry loop; Discard marks it `Discarded`. Reuse the parked-message handling that already backs notification Retry/Discard. +- Modify: `src/ScadaLink.Communication/CommunicationService.cs` — add `RetrySiteCallAsync` / `DiscardSiteCallAsync`. +- Test: `tests/ScadaLink.SiteCallAudit.Tests/` (relay routing + unreachable path), `tests/ScadaLink.StoreAndForward.Tests/` (site-side parked op reset/discard), `tests/ScadaLink.Communication.Tests/`. + +**Note for implementer:** this is the meatiest backend task — the central→site relay direction and the site-side parked-operation mutation are both required. If the site operation-tracking Retry/Discard primitive already exists for cached calls, reuse it; only add the message plumbing. + +**Commit:** `feat(sitecallaudit): central→site Retry/Discard relay for parked operations` + +--- + +## Task 6: Site Calls UI page + nav + Audit drill-in + +**What:** Build the Central UI Site Calls page — a near-mirror of `NotificationReport.razor`. Spec: `Component-SiteCallAudit.md`. + +**Files:** +- Create: `src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor` (+ `.razor.cs`) — route `@page "/site-calls/report"`, `RequireDeployment` (or `OperationalAudit`) auth to match the Notifications report gating. Structure (per the form-layout memory: header, filter card, results table, paging, modal): + - Filter card: Status, Kind, Source site, Target keyword, date range, "Stuck only" checkbox, Clear/Query. + - Results table columns: TrackedOperationId, Source site, Kind, Target, Status (badge + Stuck indicator), Retries, Last error, Created, Updated, Actions. + - Actions column: a **"View audit history"** link `href="/audit/log?correlationId=@row.TrackedOperationId"` (the `TrackedOperationId` is the audit `CorrelationId`) — mirror `NotificationReport.razor:172`; plus **Retry/Discard** buttons shown only on `Parked` rows (none on `Failed`). + - Keyset Previous/Next paging; double-click row → detail modal (body shows full row + LastError; reuse the Notifications detail-modal idiom — never `MarkupString`). +- Modify: `src/ScadaLink.CentralUI/Components/Layout/NavMenu.razor` — register the Site Calls page (own "Site Calls" section, or under an existing group, consistent with the `Notifications` / `Audit` section pattern at lines ~65–129). +- Modify: `src/ScadaLink.CentralUI/Components/Pages/Audit/AuditLogPage.razor.cs` — confirm `?correlationId=` drill-in already covers this (it does); no change expected — just verify. +- Test: `tests/ScadaLink.CentralUI.Tests/Pages/` (bUnit — scaffold, paging, parked-only actions, drill-in link), `tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs` (new). + +**Use the `frontend-design` skill** for page/component styling guidance. Blazor Server + Bootstrap only; custom components; clean corporate aesthetic. + +**Commit:** `feat(centralui): Site Calls page with Retry/Discard and Audit drill-in` + +--- + +## Task 7: Site Call KPI tiles + Health dashboard integration + +**What:** Surface Site Call Audit KPIs on the Health dashboard, mirroring the Notification Outbox tiles + `AuditKpiTiles`. + +**Files:** +- Create: `src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor` (+ `.razor.cs`) — mirror `Components/Health/AuditKpiTiles.razor`; tiles for Buffered, Parked (danger border if >0), Stuck (warning border if >0); each tile navigates to `/site-calls/report` with a query-string filter. +- Modify: `src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor` (+ code-behind) — add a "Site Calls" KPI section between the Notification Outbox and Audit Log sections; load via `CommunicationService.GetSiteCallKpisAsync` (Task 4). +- Test: `tests/ScadaLink.CentralUI.Tests/` (bUnit — tile rendering, threshold borders, navigation targets). + +**Commit:** `feat(centralui): Site Call KPI tiles on the Health dashboard` + +--- + +## Task 8: Multi-value `AuditLogQueryFilter` — contract + repository + +**What:** Widen `AuditLogQueryFilter` from single-value to multi-value on the `Channel`, `Kind`, `Status`, `SourceSiteId` dimensions, and translate them to `IN (...)` in the repository. `Target`, `Actor`, `CorrelationId`, `FromUtc`, `ToUtc` stay as-is. Keyset paging must not change. + +**Files:** +- Modify: `src/ScadaLink.Commons/Types/Audit/AuditLogQueryFilter.cs` — change `Channel`/`Kind`/`Status`/`SourceSiteId` to `IReadOnlyList<…>?` (e.g. `IReadOnlyList? Channels`). Keep the record's other params. This is a **breaking shape change** — update every call site in this task. +- Modify: `src/ScadaLink.ConfigurationDatabase/Repositories/AuditLogRepository.cs` (`QueryAsync`, ~lines 119–165) — each widened dimension becomes `if (filter.Channels is { Count: > 0 }) query = query.Where(e => filter.Channels.Contains(e.Channel));`. Empty/null list = no filter. Keyset predicate + `OrderByDescending` untouched. +- Update all other `AuditLogQueryFilter` constructors in this task so the solution compiles (ManagementService `ParseFilter`, CentralUI `AuditQueryModel.ToFilter`, CLI helpers, tests) — the deep behaviour of those consumers is Task 9; here just make them compile (e.g. wrap a single value in a one-element list). +- Test: `tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/AuditLogRepositoryTests.cs` — add `QueryAsync_FilterByMultipleChannels_ReturnsUnion`, multi-status, multi-site; keep the existing single-value and keyset tests green. + +**Commit:** `feat(auditlog): multi-value AuditLogQueryFilter dimensions` + +--- + +## Task 9: Multi-value filters — ManagementService, CLI, Central UI + +**What:** Make the three consumers actually emit/accept multiple values per dimension instead of collapsing to the first. + +**Files:** +- Modify: `src/ScadaLink.ManagementService/AuditEndpoints.cs` (`ParseFilter`, ~lines 369–414) — read repeated query params with `.ToArray()` (not `.ToString()`); parse each into the enum list; unparseable values silently dropped (keep the existing lax contract). +- Modify: `src/ScadaLink.CentralUI/Components/Audit/AuditQueryModel.cs` (`ToFilter`, ~lines 110–126) — stop collapsing to `.First()`; pass the full `Channels`/`Kinds`/`Statuses`/`SiteIdentifiers` sets. Adjust the `ErrorsOnly` logic (lines ~128–145) for multi-value `Status`. The chip UI already supports multi-select — no `.razor` change expected; verify. +- Modify: `src/ScadaLink.CentralUI/Components/Pages/Audit/AuditLogPage.razor.cs` export-URL builder (~lines 175–227) — emit repeated query-string params per selected value. +- Modify: `src/ScadaLink.CLI/Commands/AuditCommands.cs` (~lines 29–41) — make `--channel`/`--kind`/`--status`/`--site` accept multiple values (System.CommandLine multi-arity options; keep `AcceptOnlyFromAmong` for the enum-like ones). Modify `src/ScadaLink.CLI/Commands/AuditQueryHelpers.cs` — `AuditQueryArgs` fields become arrays; `BuildQueryString` emits one key per value. +- Test: extend `tests/ScadaLink.ManagementService.Tests/AuditEndpointsTests.cs`, `tests/ScadaLink.CLI.Tests/Commands/AuditQueryCommandTests.cs`, `tests/ScadaLink.CentralUI.Tests/` filter-model tests for multi-value round-trips. + +**Commit:** `feat(audit): multi-value filters across ManagementService, CLI and Central UI` + +--- + +## Task 10: Audit results grid — column resize + reorder UX + +**What:** Add drag-to-resize and drag-to-reorder column UX to `AuditResultsGrid`, persisted in `sessionStorage`. Blazor + Bootstrap + minimal JS interop only (no third-party libs). + +**Files:** +- Create: `src/ScadaLink.CentralUI/wwwroot/js/audit-grid.js` — a `window.auditGrid` namespace: column-resize drag handlers, header drag-reorder handlers, and `save(key,json)` / `load(key)` over `sessionStorage` (mirror `treeview-storage.js`). +- Modify: `src/ScadaLink.CentralUI/Components/Audit/AuditResultsGrid.razor` (+ `.razor.cs`) — render a resize handle in each ``; make headers draggable; apply persisted widths (inline style/CSS var) and column order (the `ColumnOrder` parameter + `OrderedColumns()` already exist — wire it to persisted state); `IJSRuntime` calls to load on first render and save on change. +- Create: `src/ScadaLink.CentralUI/Components/Audit/AuditResultsGrid.razor.css` — resize-handle styling, drag-over feedback (mirror `AuditDrilldownDrawer.razor.css` / `TreeView.razor.css` idioms). +- Reference the script from the host page (`App.razor` / `_Host` / layout — match where `monaco-init.js` / `session-expiry.js` are referenced). +- Test: extend `tests/ScadaLink.CentralUI.PlaywrightTests/Audit/AuditLogPageTests.cs` (or new `AuditGridColumnTests.cs`) — resize changes a column width, reorder changes header order, both survive a reload via `sessionStorage`. + +**Use the `frontend-design` skill** for the resize-handle / drag-feedback visual treatment. + +**Commit:** `feat(centralui): column resize and reorder for the audit results grid` + +--- + +## Final review + +After Task 10: dispatch a final cross-cutting code review of the whole branch against this plan, then run the full solution build + test once more. Update `docs/plans/2026-05-20-audit-log-code-roadmap.md` header lines 14–19 to strike the five now-completed follow-ups (leaving the three v1.x items). Hand back to the user for the push decision (do not push). + +--- + +## Task dependency summary + +- Task 0 blocks everything. +- Task 2 blocked by Task 1. +- Task 3 independent (after Task 0). +- Task 5 blocked by Task 4. +- Task 6 blocked by Tasks 4 and 5. +- Task 7 blocked by Task 4. +- Task 9 blocked by Task 8. +- Task 10 independent (after Task 0). + +Execution order: 0 → 1 → 2 → 3 → 4 → 5 → 6 → 7 → 8 → 9 → 10 → final review. diff --git a/docs/plans/2026-05-21-audit-log-followups.md.tasks.json b/docs/plans/2026-05-21-audit-log-followups.md.tasks.json new file mode 100644 index 0000000..3f476d5 --- /dev/null +++ b/docs/plans/2026-05-21-audit-log-followups.md.tasks.json @@ -0,0 +1,17 @@ +{ + "planPath": "docs/plans/2026-05-21-audit-log-followups.md", + "tasks": [ + {"id": 33, "subject": "Task 0: Prep — feature branch", "status": "pending"}, + {"id": 34, "subject": "Task 1: Audit push — central ingest routing over ClusterClient", "status": "pending", "blockedBy": [33]}, + {"id": 35, "subject": "Task 2: Audit push — real site client, Host wiring, integration test", "status": "pending", "blockedBy": [34]}, + {"id": 36, "subject": "Task 3: Consolidate the duplicated audit DTO mappers", "status": "pending", "blockedBy": [33]}, + {"id": 37, "subject": "Task 4: Site Call Audit — query / KPI / detail backend", "status": "pending", "blockedBy": [33]}, + {"id": 38, "subject": "Task 5: Site Call Audit — Retry/Discard relay to owning site", "status": "pending", "blockedBy": [37]}, + {"id": 39, "subject": "Task 6: Site Calls UI page + nav + Audit drill-in", "status": "pending", "blockedBy": [37, 38]}, + {"id": 40, "subject": "Task 7: Site Call KPI tiles + Health dashboard integration", "status": "pending", "blockedBy": [37]}, + {"id": 41, "subject": "Task 8: Multi-value AuditLogQueryFilter — contract + repository", "status": "pending", "blockedBy": [33]}, + {"id": 42, "subject": "Task 9: Multi-value filters — ManagementService, CLI, Central UI", "status": "pending", "blockedBy": [41]}, + {"id": 43, "subject": "Task 10: Audit results grid — column resize + reorder UX", "status": "pending", "blockedBy": [33]} + ], + "lastUpdated": "2026-05-21T07:30:00Z" +} From 6d073046c67f2032ed91cc7523247ce1a60d8ee8 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 03:23:30 -0400 Subject: [PATCH 02/23] feat(communication): route audit ingest commands through CentralCommunicationActor --- .../Actors/CentralCommunicationActor.cs | 88 ++++++++++++ .../Actors/AkkaHostedService.cs | 5 + .../CentralCommunicationActorAuditTests.cs | 128 ++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs diff --git a/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs b/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs index 505c516..055653b 100644 --- a/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs +++ b/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs @@ -5,6 +5,7 @@ using Akka.Cluster.Tools.PublishSubscribe; using Akka.Event; using Microsoft.Extensions.DependencyInjection; using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Messages.Communication; using ScadaLink.Commons.Messages.Health; using ScadaLink.Commons.Messages.Notification; @@ -76,6 +77,25 @@ public class CentralCommunicationActor : ReceiveActor /// private IActorRef? _notificationOutboxProxy; + /// + /// Proxy for the central AuditLogIngestActor cluster + /// singleton. Set via — the Host creates the + /// singleton proxy after this actor and registers it (mirrors + /// ). Null until registration completes; + /// an audit ingest command arriving before then is answered with an empty + /// reply so the site keeps its rows Pending and retries. + /// + private IActorRef? _auditIngestProxy; + + /// + /// Ask timeout for routing audit ingest commands to the AuditLogIngestActor + /// proxy — 30 s, matching SiteStreamGrpcServer.AuditIngestAskTimeout + /// (that constant is private to the gRPC server and not reachable here, so it + /// is declared locally). A generous window absorbs a slow MS SQL connection + /// without the round-trip surfacing as a failure on a healthy site. + /// + private static readonly TimeSpan AuditIngestAskTimeout = TimeSpan.FromSeconds(30); + /// /// DistributedPubSub topic used to fan health reports out to the peer /// central node so both per-node aggregators stay in sync. See @@ -133,6 +153,24 @@ public class CentralCommunicationActor : ReceiveActor // so the NotificationStatusResponse routes back to the querying site. Receive(HandleNotificationStatusQuery); + // Audit Log (#23): the Host registers the AuditLogIngestActor singleton + // proxy after this actor is created (the proxy cannot exist before this + // actor's construction). + Receive(msg => + { + _auditIngestProxy = msg.AuditIngestActor; + _log.Info("Registered audit ingest proxy"); + }); + + // Audit Log (#23) site→central ingest: a site forwards a batch of audit + // events to the central cluster via ClusterClient. Ask the ingest proxy + // and pipe the IngestAuditEventsReply back to the original Sender (the + // site's ClusterClient path) so the site can flip its rows to Forwarded. + Receive(HandleIngestAuditEvents); + + // Audit Log (#23 M3) combined-telemetry ingest: routes to the same proxy + // the same way; the proxy replies with an IngestCachedTelemetryReply. + Receive(HandleIngestCachedTelemetry); } private void HandleNotificationSubmit(NotificationSubmit msg) @@ -172,6 +210,45 @@ public class CentralCommunicationActor : ReceiveActor _notificationOutboxProxy.Forward(msg); } + private void HandleIngestAuditEvents(IngestAuditEventsCommand msg) + { + if (_auditIngestProxy == null) + { + // No ingest proxy registered yet (host startup race). Reply with an + // empty IngestAuditEventsReply so the site keeps its rows Pending and + // retries — the same behaviour as the gRPC handler's wiring-race path. + _log.Warning( + "Cannot route IngestAuditEventsCommand ({0} events) — audit ingest not available", + msg.Events.Count); + Sender.Tell(new IngestAuditEventsReply(Array.Empty())); + return; + } + + // Capture Sender before the async/PipeTo — Akka resets Sender between + // dispatches. The reply is piped straight back to the site's ClusterClient. + var replyTo = Sender; + _log.Debug("Routing IngestAuditEventsCommand ({0} events) to the audit ingest actor", msg.Events.Count); + _auditIngestProxy.Ask(msg, AuditIngestAskTimeout) + .PipeTo(replyTo); + } + + private void HandleIngestCachedTelemetry(IngestCachedTelemetryCommand msg) + { + if (_auditIngestProxy == null) + { + _log.Warning( + "Cannot route IngestCachedTelemetryCommand ({0} entries) — audit ingest not available", + msg.Entries.Count); + Sender.Tell(new IngestCachedTelemetryReply(Array.Empty())); + return; + } + + var replyTo = Sender; + _log.Debug("Routing IngestCachedTelemetryCommand ({0} entries) to the audit ingest actor", msg.Entries.Count); + _auditIngestProxy.Ask(msg, AuditIngestAskTimeout) + .PipeTo(replyTo); + } + private void HandleHeartbeat(HeartbeatMessage heartbeat) { var aggregator = _serviceProvider.GetService(); @@ -464,3 +541,14 @@ public record DebugStreamTerminated(string SiteId, string CorrelationId); /// after the outbox singleton proxy is created. /// public record RegisterNotificationOutbox(IActorRef OutboxProxy); + +/// +/// Registers the central AuditLogIngestActor singleton proxy with the +/// so site-forwarded +/// and +/// messages can be routed to it. Sent by the Host after the audit-ingest +/// singleton proxy is created. Lives here (not in Commons) because +/// ScadaLink.Commons has no Akka package reference and cannot hold an +/// field. +/// +public sealed record RegisterAuditIngest(IActorRef AuditIngestActor); diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index dce065a..e06720e 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -370,6 +370,11 @@ akka {{ .WithSingletonName("audit-log-ingest")); var auditIngestProxy = _actorSystem.ActorOf(auditIngestProxyProps, "audit-log-ingest-proxy"); + // Hand the audit-ingest proxy to the CentralCommunicationActor so audit + // ingest commands forwarded by sites over ClusterClient are routed to the + // singleton. Mirrors the RegisterNotificationOutbox wiring above. + centralCommActor.Tell(new RegisterAuditIngest(auditIngestProxy)); + // Hand the proxy to the SiteStreamGrpcServer (if registered on this node) // so the IngestAuditEvents RPC routes incoming site batches to the singleton. // The gRPC server is currently only registered on Site nodes; on a central diff --git a/tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs b/tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs new file mode 100644 index 0000000..a3d89f8 --- /dev/null +++ b/tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs @@ -0,0 +1,128 @@ +using Akka.Actor; +using Akka.TestKit; +using Akka.TestKit.Xunit2; +using Microsoft.Extensions.DependencyInjection; +using NSubstitute; +using ScadaLink.Commons.Entities.Audit; +using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Commons.Types; +using ScadaLink.Commons.Types.Enums; +using ScadaLink.Communication.Actors; + +namespace ScadaLink.Communication.Tests; + +/// +/// Tests for the Audit Log (#23) site→central ClusterClient ingest routing on +/// . A site ClusterClient delivers +/// / +/// to the receptionist-registered actor, which forwards to the registered +/// AuditLogIngestActor proxy and routes the reply back to the site. +/// Mirrors the NotificationSubmit / RegisterNotificationOutbox pattern. +/// +public class CentralCommunicationActorAuditTests : TestKit +{ + public CentralCommunicationActorAuditTests() : base(@"akka.loglevel = DEBUG") { } + + private IActorRef CreateActor() + { + var mockRepo = Substitute.For(); + mockRepo.GetAllSitesAsync(Arg.Any()) + .Returns(new List()); + + var services = new ServiceCollection(); + services.AddScoped(_ => mockRepo); + var sp = services.BuildServiceProvider(); + + var mockFactory = Substitute.For(); + return Sys.ActorOf(Props.Create(() => new CentralCommunicationActor(sp, mockFactory))); + } + + private static AuditEvent SampleAuditEvent() => new() + { + EventId = Guid.NewGuid(), + OccurredAtUtc = DateTime.UtcNow, + Channel = AuditChannel.ApiOutbound, + Kind = AuditKind.ApiCall, + Status = AuditStatus.Delivered, + }; + + private static SiteCall SampleSiteCall() => new() + { + TrackedOperationId = TrackedOperationId.New(), + Channel = "OutboundApi", + Target = "ExternalSystemA", + SourceSite = "site1", + Status = "Delivered", + RetryCount = 0, + CreatedAtUtc = DateTime.UtcNow, + UpdatedAtUtc = DateTime.UtcNow, + IngestedAtUtc = DateTime.UtcNow, + }; + + [Fact] + public void IngestAuditEventsCommand_WithRegisteredProxy_ForwardsAndRoutesReplyToSender() + { + var actor = CreateActor(); + var auditProbe = CreateTestProbe(); + actor.Tell(new RegisterAuditIngest(auditProbe.Ref)); + + var evt = SampleAuditEvent(); + var cmd = new IngestAuditEventsCommand(new[] { evt }); + actor.Tell(cmd); + + // The audit-ingest proxy receives the command, with the original site + // sender preserved (Forward semantics). + auditProbe.ExpectMsg(cmd); + + // When the proxy replies, the actor routes it back to the original sender. + var reply = new IngestAuditEventsReply(new[] { evt.EventId }); + auditProbe.Reply(reply); + + var received = ExpectMsg(); + Assert.Equal(new[] { evt.EventId }, received.AcceptedEventIds); + } + + [Fact] + public void IngestAuditEventsCommand_WithNoProxyRegistered_RepliesEmptyAcceptedEventIds() + { + var actor = CreateActor(); + + actor.Tell(new IngestAuditEventsCommand(new[] { SampleAuditEvent() })); + + var reply = ExpectMsg(); + Assert.Empty(reply.AcceptedEventIds); + } + + [Fact] + public void IngestCachedTelemetryCommand_WithRegisteredProxy_ForwardsAndRoutesReplyToSender() + { + var actor = CreateActor(); + var auditProbe = CreateTestProbe(); + actor.Tell(new RegisterAuditIngest(auditProbe.Ref)); + + var entry = new CachedTelemetryEntry(SampleAuditEvent(), SampleSiteCall()); + var cmd = new IngestCachedTelemetryCommand(new[] { entry }); + actor.Tell(cmd); + + auditProbe.ExpectMsg(cmd); + + var reply = new IngestCachedTelemetryReply(new[] { entry.Audit.EventId }); + auditProbe.Reply(reply); + + var received = ExpectMsg(); + Assert.Equal(new[] { entry.Audit.EventId }, received.AcceptedEventIds); + } + + [Fact] + public void IngestCachedTelemetryCommand_WithNoProxyRegistered_RepliesEmptyAcceptedEventIds() + { + var actor = CreateActor(); + + var entry = new CachedTelemetryEntry(SampleAuditEvent(), SampleSiteCall()); + actor.Tell(new IngestCachedTelemetryCommand(new[] { entry })); + + var reply = ExpectMsg(); + Assert.Empty(reply.AcceptedEventIds); + } +} From 8c789135035ae4aa4ab235df5010e5175a1b5a1d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 03:29:54 -0400 Subject: [PATCH 03/23] fix(communication): correct audit-ingest timeout-path docs and add timeout test --- .../Actors/CentralCommunicationActor.cs | 51 +++++++++++++++---- .../CentralCommunicationActorAuditTests.cs | 27 +++++++++- 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs b/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs index 055653b..319a267 100644 --- a/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs +++ b/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs @@ -84,17 +84,35 @@ public class CentralCommunicationActor : ReceiveActor /// ). Null until registration completes; /// an audit ingest command arriving before then is answered with an empty /// reply so the site keeps its rows Pending and retries. + /// + /// Once registered, the handler Asks this proxy and pipes the reply straight + /// back to the caller. On an Ask timeout or a faulted reply, PipeTo forwards a + /// to the caller — the fault propagates rather + /// than being swallowed. This differs from the gRPC handler + /// (SiteStreamGrpcServer), which catches the exception and returns an + /// empty ack; here the faulted Ask is the transient signal the site relies on + /// (see ). /// private IActorRef? _auditIngestProxy; /// - /// Ask timeout for routing audit ingest commands to the AuditLogIngestActor - /// proxy — 30 s, matching SiteStreamGrpcServer.AuditIngestAskTimeout - /// (that constant is private to the gRPC server and not reachable here, so it - /// is declared locally). A generous window absorbs a slow MS SQL connection - /// without the round-trip surfacing as a failure on a healthy site. + /// Default Ask timeout for routing audit ingest commands to the + /// AuditLogIngestActor proxy — 30 s, matching the value of + /// SiteStreamGrpcServer.AuditIngestAskTimeout (that constant is private + /// to the gRPC server and not reachable here, so it is declared locally). A + /// generous window absorbs a slow MS SQL connection without the round-trip + /// surfacing as a failure on a healthy site. When the window is exceeded the + /// Ask faults and that fault is piped back to the caller as a + /// (see ). /// - private static readonly TimeSpan AuditIngestAskTimeout = TimeSpan.FromSeconds(30); + private static readonly TimeSpan DefaultAuditIngestAskTimeout = TimeSpan.FromSeconds(30); + + /// + /// Effective Ask timeout for audit ingest routing. Defaults to + /// ; overridable via the constructor + /// so tests can exercise the timeout/fault path without waiting 30 s. + /// + private readonly TimeSpan _auditIngestAskTimeout; /// /// DistributedPubSub topic used to fan health reports out to the peer @@ -103,10 +121,19 @@ public class CentralCommunicationActor : ReceiveActor /// private const string HealthReportTopic = "site-health-replica"; - public CentralCommunicationActor(IServiceProvider serviceProvider, ISiteClientFactory siteClientFactory) + /// + /// Optional override for the audit-ingest Ask timeout; defaults to + /// (30 s). Exists only so tests can + /// exercise the timeout/fault path quickly — production always uses the default. + /// + public CentralCommunicationActor( + IServiceProvider serviceProvider, + ISiteClientFactory siteClientFactory, + TimeSpan? auditIngestAskTimeout = null) { _serviceProvider = serviceProvider; _siteClientFactory = siteClientFactory; + _auditIngestAskTimeout = auditIngestAskTimeout ?? DefaultAuditIngestAskTimeout; // Site address cache loaded from database Receive(HandleSiteAddressCacheLoaded); @@ -226,9 +253,15 @@ public class CentralCommunicationActor : ReceiveActor // Capture Sender before the async/PipeTo — Akka resets Sender between // dispatches. The reply is piped straight back to the site's ClusterClient. + // On an Ask timeout or a faulted reply, PipeTo delivers a Status.Failure to + // replyTo: the fault propagates to the caller rather than being swallowed. + // The site's own Ask through this path then faults, and the site drain loop + // treats that as a transient failure — rows stay Pending and are retried on + // the next tick. (The gRPC handler instead returns an empty ack on fault; + // propagating the fault here is the cleaner transient signal.) var replyTo = Sender; _log.Debug("Routing IngestAuditEventsCommand ({0} events) to the audit ingest actor", msg.Events.Count); - _auditIngestProxy.Ask(msg, AuditIngestAskTimeout) + _auditIngestProxy.Ask(msg, _auditIngestAskTimeout) .PipeTo(replyTo); } @@ -245,7 +278,7 @@ public class CentralCommunicationActor : ReceiveActor var replyTo = Sender; _log.Debug("Routing IngestCachedTelemetryCommand ({0} entries) to the audit ingest actor", msg.Entries.Count); - _auditIngestProxy.Ask(msg, AuditIngestAskTimeout) + _auditIngestProxy.Ask(msg, _auditIngestAskTimeout) .PipeTo(replyTo); } diff --git a/tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs b/tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs index a3d89f8..f004ec2 100644 --- a/tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs +++ b/tests/ScadaLink.Communication.Tests/CentralCommunicationActorAuditTests.cs @@ -24,7 +24,7 @@ public class CentralCommunicationActorAuditTests : TestKit { public CentralCommunicationActorAuditTests() : base(@"akka.loglevel = DEBUG") { } - private IActorRef CreateActor() + private IActorRef CreateActor(TimeSpan? auditIngestAskTimeout = null) { var mockRepo = Substitute.For(); mockRepo.GetAllSitesAsync(Arg.Any()) @@ -35,7 +35,8 @@ public class CentralCommunicationActorAuditTests : TestKit var sp = services.BuildServiceProvider(); var mockFactory = Substitute.For(); - return Sys.ActorOf(Props.Create(() => new CentralCommunicationActor(sp, mockFactory))); + return Sys.ActorOf(Props.Create(() => + new CentralCommunicationActor(sp, mockFactory, auditIngestAskTimeout))); } private static AuditEvent SampleAuditEvent() => new() @@ -94,6 +95,28 @@ public class CentralCommunicationActorAuditTests : TestKit Assert.Empty(reply.AcceptedEventIds); } + [Fact] + public void IngestAuditEventsCommand_WhenProxyNeverReplies_PipesStatusFailureToSender() + { + // A short test-only Ask timeout (constructor seam) keeps the test fast — + // production uses the 30 s default. + var actor = CreateActor(auditIngestAskTimeout: TimeSpan.FromMilliseconds(200)); + var auditProbe = CreateTestProbe(); + actor.Tell(new RegisterAuditIngest(auditProbe.Ref)); + + var cmd = new IngestAuditEventsCommand(new[] { SampleAuditEvent() }); + actor.Tell(cmd); + + // The proxy receives the command but deliberately never replies. + auditProbe.ExpectMsg(cmd); + + // The Ask times out; PipeTo forwards the faulted task as a Status.Failure + // to the original sender. This is the real transient signal the site's + // own Ask faults on — it is NOT swallowed into an empty ack. + var failure = ExpectMsg(); + Assert.IsType(failure.Cause); + } + [Fact] public void IngestCachedTelemetryCommand_WithRegisteredProxy_ForwardsAndRoutesReplyToSender() { From de5280d1c7de5c9abab68eaf874c21415810c00c Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 03:39:17 -0400 Subject: [PATCH 04/23] feat(auditlog): real ClusterClient-based site audit push client --- .../ServiceCollectionExtensions.cs | 13 +- .../Telemetry/ClusterClientSiteAuditClient.cs | 146 +++++++++++++ .../Actors/SiteCommunicationActor.cs | 49 +++++ .../Actors/AkkaHostedService.cs | 14 +- .../ClusterClientSiteAuditClientTests.cs | 202 ++++++++++++++++++ .../AuditLog/SiteAuditPushFlowTests.cs | 200 +++++++++++++++++ 6 files changed, 617 insertions(+), 7 deletions(-) create mode 100644 src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs create mode 100644 tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs create mode 100644 tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs diff --git a/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs b/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs index 626859f..e0d9e65 100644 --- a/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs +++ b/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs @@ -121,11 +121,14 @@ public static class ServiceCollectionExtensions logger: sp.GetRequiredService>(), filter: sp.GetRequiredService())); - // ISiteStreamAuditClient: NoOp default. M6's reconciliation work brings - // the real gRPC-backed implementation (no site→central gRPC channel - // exists today — sites talk to central via Akka ClusterClient only). - // Bundle H's integration test substitutes a stub directly into the - // SiteAuditTelemetryActor's Props.Create call. + // ISiteStreamAuditClient: NoOp default. This binding remains correct for + // central/test composition roots that have no SiteCommunicationActor. + // The real implementation is ClusterClientSiteAuditClient, which pushes + // audit telemetry to central over Akka ClusterClient via the site's + // SiteCommunicationActor — the Host wires it directly into the + // SiteAuditTelemetryActor's Props.Create call for site roles (it cannot + // be a DI singleton because it needs the SiteCommunicationActor IActorRef, + // created during Akka bootstrap, not at DI-composition time). services.AddSingleton(); // M3 Bundle F: site-side dual emitter for cached-call lifecycle diff --git a/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs b/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs new file mode 100644 index 0000000..dcf2fc4 --- /dev/null +++ b/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs @@ -0,0 +1,146 @@ +using Akka.Actor; +using ScadaLink.AuditLog.Telemetry; +using ScadaLink.Commons.Entities.Audit; +using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Commons.Types; +using ScadaLink.Communication.Grpc; + +namespace ScadaLink.AuditLog.Site.Telemetry; + +/// +/// Production binding for site composition +/// roots: pushes audit telemetry to central over Akka ClusterClient via +/// the site's SiteCommunicationActor. The actor forwards the command to +/// /user/central-communication and the central +/// CentralCommunicationActor Asks the AuditLogIngestActor proxy — +/// the same command/control transport notifications already use. Wired by the +/// Host for site roles; central and test composition roots keep the +/// DI default (they have no +/// SiteCommunicationActor). +/// +/// +/// +/// Throw-on-failure contract. An Ask timeout or a faulted reply +/// () propagates as a thrown exception out of the +/// Ingest*Async methods — it is NOT caught and turned into an empty ack. +/// The drain loop treats a thrown +/// exception as transient and leaves the rows Pending for the next tick. +/// Swallowing the fault into an empty ack would be indistinguishable from "zero +/// rows accepted" and would silently lose the retry signal. Task 1 confirmed +/// the central receiving end does not collapse an ingest fault into an empty +/// ack either, so a site-side Ask through the whole path faults cleanly on a +/// central-side timeout. +/// +/// +/// The batches arrive as proto DTOs ( / +/// ) because the +/// builds them with +/// . This client converts them back into +/// the / entities the Akka +/// command messages carry — the same DTO→entity translation the +/// SiteStreamGrpcServer performs for the gRPC reconciliation path. +/// +/// +public sealed class ClusterClientSiteAuditClient : ISiteStreamAuditClient +{ + private readonly IActorRef _siteCommunicationActor; + private readonly TimeSpan _askTimeout; + + /// + /// The site's SiteCommunicationActor — it forwards the ingest command + /// over the registered central ClusterClient and routes the reply back to + /// this client's Ask. + /// + /// + /// Ask timeout for the round-trip to central. On expiry the Ask throws + /// , which the drain loop treats + /// as transient (rows stay Pending). + /// + public ClusterClientSiteAuditClient(IActorRef siteCommunicationActor, TimeSpan askTimeout) + { + ArgumentNullException.ThrowIfNull(siteCommunicationActor); + _siteCommunicationActor = siteCommunicationActor; + _askTimeout = askTimeout; + } + + /// + public async Task IngestAuditEventsAsync(AuditEventBatch batch, CancellationToken ct) + { + ArgumentNullException.ThrowIfNull(batch); + + var events = new List(batch.Events.Count); + foreach (var dto in batch.Events) + { + events.Add(AuditEventMapper.FromDto(dto)); + } + + // Ask throws AskTimeoutException on timeout and rethrows a + // Status.Failure's inner cause — both surface as a thrown exception so + // the drain loop keeps the rows Pending. We deliberately do NOT catch. + var reply = await _siteCommunicationActor + .Ask(new IngestAuditEventsCommand(events), _askTimeout, ct) + .ConfigureAwait(false); + + return ToAck(reply.AcceptedEventIds); + } + + /// + public async Task IngestCachedTelemetryAsync(CachedTelemetryBatch batch, CancellationToken ct) + { + ArgumentNullException.ThrowIfNull(batch); + + var entries = new List(batch.Packets.Count); + foreach (var packet in batch.Packets) + { + var audit = AuditEventMapper.FromDto(packet.AuditEvent); + var siteCall = MapSiteCall(packet.Operational); + entries.Add(new CachedTelemetryEntry(audit, siteCall)); + } + + // Same throw-on-failure contract as IngestAuditEventsAsync. The reply + // type is IngestCachedTelemetryReply (the central dual-write reply), + // distinct from IngestAuditEventsReply. + var reply = await _siteCommunicationActor + .Ask(new IngestCachedTelemetryCommand(entries), _askTimeout, ct) + .ConfigureAwait(false); + + return ToAck(reply.AcceptedEventIds); + } + + private static IngestAck ToAck(IReadOnlyList acceptedEventIds) + { + var ack = new IngestAck(); + foreach (var id in acceptedEventIds) + { + ack.AcceptedEventIds.Add(id.ToString()); + } + return ack; + } + + /// + /// Translates a into the + /// persistence entity. Mirrors + /// SiteStreamGrpcServer.MapSiteCallFromDto — there is no shared + /// mapper because that lives in ScadaLink.Communication as a private + /// helper. is a placeholder; the + /// central AuditLogIngestActor overwrites it inside the dual-write + /// transaction so the AuditLog and SiteCalls rows share one instant. + /// + private static SiteCall MapSiteCall(SiteCallOperationalDto dto) => new() + { + TrackedOperationId = TrackedOperationId.Parse(dto.TrackedOperationId), + Channel = dto.Channel, + Target = dto.Target, + SourceSite = dto.SourceSite, + Status = dto.Status, + RetryCount = dto.RetryCount, + LastError = string.IsNullOrEmpty(dto.LastError) ? null : dto.LastError, + HttpStatus = dto.HttpStatus, + CreatedAtUtc = DateTime.SpecifyKind(dto.CreatedAtUtc.ToDateTime(), DateTimeKind.Utc), + UpdatedAtUtc = DateTime.SpecifyKind(dto.UpdatedAtUtc.ToDateTime(), DateTimeKind.Utc), + TerminalAtUtc = dto.TerminalAtUtc is null + ? null + : DateTime.SpecifyKind(dto.TerminalAtUtc.ToDateTime(), DateTimeKind.Utc), + IngestedAtUtc = DateTime.UtcNow, // overwritten by AuditLogIngestActor + }; +} diff --git a/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs b/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs index 21094b1..1934ac8 100644 --- a/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs +++ b/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs @@ -2,6 +2,7 @@ using Akka.Actor; using Akka.Cluster.Tools.Client; using Akka.Event; using ScadaLink.Commons.Messages.Artifacts; +using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Messages.DebugView; using ScadaLink.Commons.Messages.Deployment; using ScadaLink.Commons.Messages.Health; @@ -214,6 +215,54 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers new ClusterClient.Send("/user/central-communication", msg), Sender); }); + // Audit Log (#23): forward a batch of site-local audit events to the + // central cluster. The site SiteAuditTelemetryActor drains its SQLite + // Pending queue through the ClusterClientSiteAuditClient, which Asks + // this actor; the original Sender (that Ask) is passed as the + // ClusterClient.Send sender so the IngestAuditEventsReply routes + // straight back to the waiting Ask, not here. Mirrors NotificationSubmit. + Receive(msg => + { + if (_centralClient == null) + { + // No ClusterClient registered yet (e.g. central contact points + // not configured, or registration not yet completed). Faulting + // the Ask makes the SiteAuditTelemetryActor drain loop treat + // this as transient and keep the rows Pending for the next tick. + _log.Warning( + "Cannot forward IngestAuditEventsCommand ({0} events) — no central ClusterClient registered", + msg.Events.Count); + Sender.Tell(new Status.Failure( + new InvalidOperationException("Central ClusterClient not registered"))); + return; + } + + _log.Debug("Forwarding IngestAuditEventsCommand ({0} events) to central", msg.Events.Count); + _centralClient.Tell( + new ClusterClient.Send("/user/central-communication", msg), Sender); + }); + + // Audit Log (#23) M3: forward a batch of combined cached-call telemetry + // packets to the central cluster. Same forward + reply-routing pattern + // as IngestAuditEventsCommand; central replies with an + // IngestCachedTelemetryReply. + Receive(msg => + { + if (_centralClient == null) + { + _log.Warning( + "Cannot forward IngestCachedTelemetryCommand ({0} entries) — no central ClusterClient registered", + msg.Entries.Count); + Sender.Tell(new Status.Failure( + new InvalidOperationException("Central ClusterClient not registered"))); + return; + } + + _log.Debug("Forwarding IngestCachedTelemetryCommand ({0} entries) to central", msg.Entries.Count); + _centralClient.Tell( + new ClusterClient.Send("/user/central-communication", msg), Sender); + }); + // Internal: send heartbeat tick Receive(_ => SendHeartbeatToCentral()); diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index e06720e..5508ad0 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -668,8 +668,18 @@ akka {{ .GetRequiredService>(); var siteAuditQueue = _serviceProvider .GetRequiredService(); - var siteAuditClient = _serviceProvider - .GetRequiredService(); + // Audit Log (#23) Task 2 follow-up: the production site→central audit + // push uses the ClusterClient transport via the SiteCommunicationActor, + // not the DI-resolved NoOpSiteStreamAuditClient. The NoOp default stays + // correct for central/test composition roots (no SiteCommunicationActor); + // a site role wires the real ClusterClient-based client here so the + // SQLite Pending backlog actually drains to central. The forward Ask + // reuses NotificationForwardTimeout — the same site→central command + // forward bound notifications already use over this transport. + var siteAuditClient = (ScadaLink.AuditLog.Site.Telemetry.ISiteStreamAuditClient) + new ScadaLink.AuditLog.Site.Telemetry.ClusterClientSiteAuditClient( + siteCommActor, + _communicationOptions.NotificationForwardTimeout); var siteAuditLogger = _serviceProvider.GetRequiredService() .CreateLogger(); diff --git a/tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs b/tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs new file mode 100644 index 0000000..d9cbe82 --- /dev/null +++ b/tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs @@ -0,0 +1,202 @@ +using Akka.Actor; +using Akka.TestKit.Xunit2; +using Google.Protobuf.WellKnownTypes; +using ScadaLink.AuditLog.Site.Telemetry; +using ScadaLink.AuditLog.Telemetry; +using ScadaLink.Commons.Entities.Audit; +using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Commons.Types.Enums; +using ScadaLink.Communication.Grpc; + +namespace ScadaLink.AuditLog.Tests.Site.Telemetry; + +/// +/// Tests for — the production +/// binding wired by the Host for site +/// roles. The client maps the proto-DTO batches produced by +/// into the Akka +/// / +/// messages, Asks the site's SiteCommunicationActor (which forwards over +/// ClusterClient to central), and maps the reply back into an +/// . +/// +/// +/// A stands in for the SiteCommunicationActor: +/// it lets the tests assert the exact command shape AND drive the reply (or +/// withhold one to exercise the Ask-timeout path). +/// +public class ClusterClientSiteAuditClientTests : TestKit +{ + /// Short Ask timeout so the timeout test completes quickly. + private static readonly TimeSpan AskTimeout = TimeSpan.FromMilliseconds(500); + + private static AuditEvent NewEvent(Guid? id = null) => new() + { + EventId = id ?? Guid.NewGuid(), + OccurredAtUtc = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc), + Channel = AuditChannel.ApiOutbound, + Kind = AuditKind.ApiCall, + Status = AuditStatus.Delivered, + SourceSiteId = "site-1", + ForwardState = AuditForwardState.Pending, + }; + + private static AuditEventBatch BatchOf(IEnumerable events) + { + var batch = new AuditEventBatch(); + foreach (var e in events) + { + batch.Events.Add(AuditEventMapper.ToDto(e)); + } + return batch; + } + + private static SiteCallOperationalDto NewOperationalDto() => new() + { + TrackedOperationId = Guid.NewGuid().ToString(), + Channel = "ApiOutbound", + Target = "ext-system-1", + SourceSite = "site-1", + Status = "Submitted", + RetryCount = 0, + LastError = string.Empty, + CreatedAtUtc = Timestamp.FromDateTime(new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc)), + UpdatedAtUtc = Timestamp.FromDateTime(new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc)), + }; + + [Fact] + public async Task IngestAuditEventsAsync_FullAck_MapsAllAcceptedIdsOntoAck() + { + var probe = CreateTestProbe(); + var sut = new ClusterClientSiteAuditClient(probe.Ref, AskTimeout); + + var events = Enumerable.Range(0, 3).Select(_ => NewEvent()).ToList(); + var batch = BatchOf(events); + + var task = sut.IngestAuditEventsAsync(batch, CancellationToken.None); + + // The probe receives exactly one IngestAuditEventsCommand carrying the + // batch's events; it replies with every EventId accepted. + var cmd = probe.ExpectMsg(TimeSpan.FromSeconds(3)); + Assert.Equal(3, cmd.Events.Count); + Assert.Equal( + events.Select(e => e.EventId).ToHashSet(), + cmd.Events.Select(e => e.EventId).ToHashSet()); + probe.Reply(new IngestAuditEventsReply(events.Select(e => e.EventId).ToList())); + + var ack = await task; + + Assert.Equal( + events.Select(e => e.EventId.ToString()).ToHashSet(), + ack.AcceptedEventIds.ToHashSet()); + } + + [Fact] + public async Task IngestAuditEventsAsync_PartialAck_OnlyAcceptedIdsAppearOnAck() + { + var probe = CreateTestProbe(); + var sut = new ClusterClientSiteAuditClient(probe.Ref, AskTimeout); + + var events = Enumerable.Range(0, 5).Select(_ => NewEvent()).ToList(); + var accepted = events.Take(3).Select(e => e.EventId).ToList(); + + var task = sut.IngestAuditEventsAsync(BatchOf(events), CancellationToken.None); + + probe.ExpectMsg(TimeSpan.FromSeconds(3)); + probe.Reply(new IngestAuditEventsReply(accepted)); + + var ack = await task; + + Assert.Equal(3, ack.AcceptedEventIds.Count); + Assert.Equal( + accepted.Select(id => id.ToString()).ToHashSet(), + ack.AcceptedEventIds.ToHashSet()); + } + + [Fact] + public async Task IngestAuditEventsAsync_AskTimeout_Throws_SoDrainLoopKeepsRowsPending() + { + var probe = CreateTestProbe(); + var sut = new ClusterClientSiteAuditClient(probe.Ref, AskTimeout); + + var batch = BatchOf(new[] { NewEvent() }); + + // The probe receives the command but never replies — the Ask times out. + // The contract: a timeout MUST surface as a thrown exception so the + // SiteAuditTelemetryActor drain loop leaves the rows Pending. + var task = sut.IngestAuditEventsAsync(batch, CancellationToken.None); + probe.ExpectMsg(TimeSpan.FromSeconds(3)); + + await Assert.ThrowsAnyAsync(() => task); + } + + [Fact] + public async Task IngestAuditEventsAsync_FaultedReply_Throws() + { + var probe = CreateTestProbe(); + var sut = new ClusterClientSiteAuditClient(probe.Ref, AskTimeout); + + var task = sut.IngestAuditEventsAsync(BatchOf(new[] { NewEvent() }), CancellationToken.None); + probe.ExpectMsg(TimeSpan.FromSeconds(3)); + + // A Status.Failure from central (Task 1: central does not swallow an + // ingest fault into an empty ack) must propagate as a thrown exception. + probe.Reply(new Status.Failure(new InvalidOperationException("central ingest faulted"))); + + await Assert.ThrowsAnyAsync(() => task); + } + + [Fact] + public async Task IngestCachedTelemetryAsync_RoutesCommand_AndMapsReply() + { + var probe = CreateTestProbe(); + var sut = new ClusterClientSiteAuditClient(probe.Ref, AskTimeout); + + var events = Enumerable.Range(0, 2).Select(_ => NewEvent()).ToList(); + var batch = new CachedTelemetryBatch(); + foreach (var e in events) + { + batch.Packets.Add(new CachedTelemetryPacket + { + AuditEvent = AuditEventMapper.ToDto(e), + Operational = NewOperationalDto(), + }); + } + + var task = sut.IngestCachedTelemetryAsync(batch, CancellationToken.None); + + // The probe receives an IngestCachedTelemetryCommand (NOT an + // IngestAuditEventsCommand) with one entry per packet. + var cmd = probe.ExpectMsg(TimeSpan.FromSeconds(3)); + Assert.Equal(2, cmd.Entries.Count); + Assert.Equal( + events.Select(e => e.EventId).ToHashSet(), + cmd.Entries.Select(en => en.Audit.EventId).ToHashSet()); + probe.Reply(new IngestCachedTelemetryReply(events.Select(e => e.EventId).ToList())); + + var ack = await task; + + Assert.Equal( + events.Select(e => e.EventId.ToString()).ToHashSet(), + ack.AcceptedEventIds.ToHashSet()); + } + + [Fact] + public async Task IngestCachedTelemetryAsync_AskTimeout_Throws() + { + var probe = CreateTestProbe(); + var sut = new ClusterClientSiteAuditClient(probe.Ref, AskTimeout); + + var batch = new CachedTelemetryBatch(); + batch.Packets.Add(new CachedTelemetryPacket + { + AuditEvent = AuditEventMapper.ToDto(NewEvent()), + Operational = NewOperationalDto(), + }); + + var task = sut.IngestCachedTelemetryAsync(batch, CancellationToken.None); + probe.ExpectMsg(TimeSpan.FromSeconds(3)); + + await Assert.ThrowsAnyAsync(() => task); + } +} diff --git a/tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs b/tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs new file mode 100644 index 0000000..73c18a6 --- /dev/null +++ b/tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs @@ -0,0 +1,200 @@ +using System.Collections.Concurrent; +using System.Collections.Immutable; +using Akka.Actor; +using Akka.Cluster.Tools.Client; +using Akka.TestKit.Xunit2; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using NSubstitute; +using ScadaLink.AuditLog.Site; +using ScadaLink.AuditLog.Site.Telemetry; +using ScadaLink.Commons.Entities.Audit; +using ScadaLink.Commons.Entities.Sites; +using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Types; +using ScadaLink.Commons.Types.Audit; +using ScadaLink.Commons.Types.Enums; +using ScadaLink.Communication; +using ScadaLink.Communication.Actors; + +namespace ScadaLink.IntegrationTests.AuditLog; + +/// +/// End-to-end integration test for the Audit Log (#23) site→central push path +/// introduced by the "real ClusterClient-based site audit push client" follow-up. +/// +/// +/// +/// Exercises the full production chain in one actor system: the real +/// site SQLite hot-path, the real +/// drain loop, the real +/// , the real +/// forward, the real +/// routing, and the real +/// AuditLogIngestActor ingest — only the cross-cluster ClusterClient +/// transport itself is substituted by an in-process +/// that unwraps exactly as a real ClusterClient +/// would (a multi-node cluster is out of scope for an in-process test). +/// +/// +/// The central audit store is an in-memory — +/// the production AuditLogRepository emits SQL Server-specific T-SQL and +/// needs an MSSQL container, which this test deliberately avoids. The test +/// asserts both ends of the contract: a central AuditLog row appears AND +/// the site SQLite row flips from to +/// . +/// +/// +public class SiteAuditPushFlowTests : TestKit +{ + /// + /// In-process stand-in for a real Akka ClusterClient: unwraps a + /// and forwards the inner message to the + /// central actor, preserving the original sender so the reply routes back to + /// the site's Ask. A real ClusterClient does exactly this across the cluster + /// boundary; the in-process relay keeps the test free of a multi-node setup. + /// + private sealed class ClusterClientRelay : ReceiveActor + { + public ClusterClientRelay(IActorRef central) + { + Receive(send => central.Forward(send.Message)); + } + } + + /// + /// Thread-safe in-memory . Only + /// is exercised by the ingest path; the + /// rest throw because they are not reachable from this test. + /// + private sealed class InMemoryAuditLogRepository : IAuditLogRepository + { + private readonly ConcurrentDictionary _rows = new(); + + public IReadOnlyCollection Rows => _rows.Values.ToList(); + + public Task InsertIfNotExistsAsync(AuditEvent evt, CancellationToken ct = default) + { + ArgumentNullException.ThrowIfNull(evt); + // First-write-wins idempotency, mirroring the production repository. + _rows.TryAdd(evt.EventId, evt); + return Task.CompletedTask; + } + + public Task> QueryAsync( + AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default) + => throw new NotSupportedException(); + + public Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) + => throw new NotSupportedException(); + + public Task> GetPartitionBoundariesOlderThanAsync( + DateTime threshold, CancellationToken ct = default) + => throw new NotSupportedException(); + + public Task GetKpiSnapshotAsync( + TimeSpan window, DateTime? nowUtc = null, CancellationToken ct = default) + => throw new NotSupportedException(); + } + + private static AuditEvent NewPendingEvent(Guid id) => new() + { + EventId = id, + OccurredAtUtc = new DateTime(2026, 5, 21, 9, 0, 0, DateTimeKind.Utc), + Channel = AuditChannel.ApiOutbound, + Kind = AuditKind.ApiCall, + Status = AuditStatus.Delivered, + SourceSiteId = "site-1", + Target = "ext-system-1", + PayloadTruncated = false, + ForwardState = AuditForwardState.Pending, + }; + + [Fact] + public async Task SiteAuditEvent_DrainsToCentral_AndFlipsSiteRowToForwarded() + { + // ── Central side ────────────────────────────────────────────────── + // Real AuditLogIngestActor over an in-memory repository (test-mode ctor). + var centralRepo = new InMemoryAuditLogRepository(); + var ingestActor = Sys.ActorOf(Props.Create(() => + new ScadaLink.AuditLog.Central.AuditLogIngestActor( + centralRepo, + NullLogger.Instance))); + + // Real CentralCommunicationActor. Its periodic site-address refresh + // resolves an ISiteRepository from this provider; an empty result keeps + // the refresh a clean no-op and never touches the audit-ingest path. + var siteRepo = Substitute.For(); + siteRepo.GetAllSitesAsync().Returns(Array.Empty()); + var centralServices = new ServiceCollection(); + centralServices.AddScoped(_ => siteRepo); + var centralProvider = centralServices.BuildServiceProvider(); + + var centralCommActor = Sys.ActorOf(Props.Create(() => new CentralCommunicationActor( + centralProvider, + new DefaultSiteClientFactory(), + TimeSpan.FromSeconds(5)))); + centralCommActor.Tell(new RegisterAuditIngest(ingestActor)); + + // ── Site side ───────────────────────────────────────────────────── + // Real SqliteAuditWriter on a file-backed SQLite db (the site hot-path + // + Pending queue). A temp file so it survives across DI scopes. + var dbPath = Path.Combine(Path.GetTempPath(), $"auditpush-{Guid.NewGuid():N}.db"); + var writerOptions = Options.Create(new SqliteAuditWriterOptions { DatabasePath = dbPath }); + await using var writer = new SqliteAuditWriter( + writerOptions, NullLogger.Instance); + + // Real SiteCommunicationActor. RegisterCentralClient is given the relay + // standing in for the central ClusterClient. + var siteCommActor = Sys.ActorOf(Props.Create(() => new SiteCommunicationActor( + "site-1", + new CommunicationOptions(), + CreateTestProbe().Ref))); // deployment-manager proxy is unused here + var relay = Sys.ActorOf(Props.Create(() => new ClusterClientRelay(centralCommActor))); + siteCommActor.Tell(new RegisterCentralClient(relay)); + + // The production site audit push client — the unit under integration. + var auditClient = new ClusterClientSiteAuditClient( + siteCommActor, TimeSpan.FromSeconds(5)); + + // Real SiteAuditTelemetryActor drains the writer's Pending queue and + // pushes via the client. Fast intervals so the test completes quickly. + var telemetryOptions = Options.Create(new SiteAuditTelemetryOptions + { + BatchSize = 256, + BusyIntervalSeconds = 1, + IdleIntervalSeconds = 1, + }); + Sys.ActorOf(Props.Create(() => new SiteAuditTelemetryActor( + writer, + auditClient, + telemetryOptions, + NullLogger.Instance))); + + // ── Act ─────────────────────────────────────────────────────────── + // Write an audit event onto the site SQLite hot-path. It lands Pending. + var eventId = Guid.NewGuid(); + await writer.WriteAsync(NewPendingEvent(eventId)); + + // ── Assert ──────────────────────────────────────────────────────── + // Within ~10s the drain loop pushes the event to central AND flips the + // site row to Forwarded. + await AwaitAssertAsync(async () => + { + // Central received and persisted the row. + Assert.Contains(centralRepo.Rows, r => r.EventId == eventId); + + // The site row is no longer Pending. + var stillPending = await writer.ReadPendingAsync(256, CancellationToken.None); + Assert.DoesNotContain(stillPending, r => r.EventId == eventId); + }, TimeSpan.FromSeconds(10), TimeSpan.FromMilliseconds(250)); + + // The central-persisted row carries the central-stamped IngestedAtUtc. + var ingested = centralRepo.Rows.Single(r => r.EventId == eventId); + Assert.NotNull(ingested.IngestedAtUtc); + + // Cleanup the temp SQLite file. + try { File.Delete(dbPath); } catch { /* best-effort */ } + } +} From 6f59a1b54610c3038f5d10d6d54d2b5aa9919e8a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 03:46:40 -0400 Subject: [PATCH 05/23] fix(auditlog): assert Forwarded state in push integration test; tidy docs and Host wiring --- .../Site/SqliteAuditWriter.cs | 48 +++++++++++++++++++ .../Site/Telemetry/ISiteStreamAuditClient.cs | 40 ++++++++-------- .../Telemetry/NoOpSiteStreamAuditClient.cs | 28 +++++------ .../Actors/AkkaHostedService.cs | 2 +- .../AuditLog/SiteAuditPushFlowTests.cs | 9 ++-- 5 files changed, 88 insertions(+), 39 deletions(-) diff --git a/src/ScadaLink.AuditLog/Site/SqliteAuditWriter.cs b/src/ScadaLink.AuditLog/Site/SqliteAuditWriter.cs index bf5cb8b..3bce65c 100644 --- a/src/ScadaLink.AuditLog/Site/SqliteAuditWriter.cs +++ b/src/ScadaLink.AuditLog/Site/SqliteAuditWriter.cs @@ -351,6 +351,54 @@ public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable } } + /// + /// Returns up to rows in + /// , oldest + /// first, with + /// as the deterministic tiebreaker. The + /// -specific counterpart of + /// ; used by tests to assert a row reached the + /// state specifically (unlike + /// , which also returns + /// rows). + /// + public Task> ReadForwardedAsync(int limit, CancellationToken ct = default) + { + if (limit <= 0) + { + throw new ArgumentOutOfRangeException(nameof(limit), "limit must be > 0."); + } + + // Mirror ReadPendingAsync: the write lock guards the single connection. + lock (_writeLock) + { + ObjectDisposedException.ThrowIf(_disposed, this); + + using var cmd = _connection.CreateCommand(); + cmd.CommandText = """ + SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId, + SourceSiteId, SourceInstanceId, SourceScript, Actor, Target, + Status, HttpStatus, DurationMs, ErrorMessage, ErrorDetail, + RequestSummary, ResponseSummary, PayloadTruncated, Extra, ForwardState + FROM AuditLog + WHERE ForwardState = $forwarded + ORDER BY OccurredAtUtc ASC, EventId ASC + LIMIT $limit; + """; + cmd.Parameters.AddWithValue("$forwarded", AuditForwardState.Forwarded.ToString()); + cmd.Parameters.AddWithValue("$limit", limit); + + var rows = new List(Math.Min(limit, 256)); + using var reader = cmd.ExecuteReader(); + while (reader.Read()) + { + rows.Add(MapRow(reader)); + } + + return Task.FromResult>(rows); + } + } + /// /// Flips the supplied EventIds from to /// in a single UPDATE. Non-existent diff --git a/src/ScadaLink.AuditLog/Site/Telemetry/ISiteStreamAuditClient.cs b/src/ScadaLink.AuditLog/Site/Telemetry/ISiteStreamAuditClient.cs index 6314bba..b6b27f5 100644 --- a/src/ScadaLink.AuditLog/Site/Telemetry/ISiteStreamAuditClient.cs +++ b/src/ScadaLink.AuditLog/Site/Telemetry/ISiteStreamAuditClient.cs @@ -3,40 +3,40 @@ using ScadaLink.Communication.Grpc; namespace ScadaLink.AuditLog.Site.Telemetry; /// -/// Mockable abstraction over the central site-stream gRPC client surface that -/// uses to push -/// payloads. The production implementation (added in Bundle E host wiring) -/// wraps the auto-generated SiteStreamService.SiteStreamServiceClient; -/// unit tests substitute via NSubstitute against this interface so the actor -/// never needs a live gRPC channel. +/// Mockable abstraction over the central site-audit push surface that +/// uses to forward +/// payloads. The production implementation is +/// — a ClusterClient-based client, +/// wired in the Host for site roles, that forwards batches to central via the +/// site's SiteCommunicationActor. Unit tests substitute via NSubstitute +/// against this interface so the actor never needs a live transport. /// public interface ISiteStreamAuditClient { /// - /// Pushes to the central IngestAuditEvents - /// RPC. The returned carries the - /// accepted_event_ids the actor will flip to + /// Forwards to the central audit-ingest path. The + /// returned carries the accepted_event_ids + /// the actor will flip to /// /// in the site SQLite queue. /// Task IngestAuditEventsAsync(AuditEventBatch batch, CancellationToken ct); /// - /// Pushes the combined (Audit Log #23 / M3) - /// to the central IngestCachedTelemetry RPC. Each packet carries both - /// the audit row and the operational SiteCalls upsert; central writes - /// both in a single MS SQL transaction. Returns the same - /// shape as so - /// the M3 site-side forwarder can flip the underlying audit rows to + /// Forwards the combined (Audit Log #23) + /// to the central cached-telemetry ingest path. Each packet carries both the + /// audit row and the operational SiteCalls upsert; central writes both + /// in a single MS SQL transaction. Returns the same + /// shape as so the site-side forwarder + /// can flip the underlying audit rows to /// /// once central has acknowledged them. /// /// - /// The production gRPC-backed implementation lands in M6 (no site→central - /// gRPC channel exists today); until then the default - /// binding returns an empty ack and - /// integration tests substitute a direct-actor client that routes the batch - /// straight into the in-process AuditLogIngestActor. + /// The production forwards over + /// the ClusterClient transport; the + /// DI default (used by central and test composition roots) returns an empty + /// ack so no rows are flipped. /// Task IngestCachedTelemetryAsync(CachedTelemetryBatch batch, CancellationToken ct); } diff --git a/src/ScadaLink.AuditLog/Site/Telemetry/NoOpSiteStreamAuditClient.cs b/src/ScadaLink.AuditLog/Site/Telemetry/NoOpSiteStreamAuditClient.cs index b83a215..2bf786d 100644 --- a/src/ScadaLink.AuditLog/Site/Telemetry/NoOpSiteStreamAuditClient.cs +++ b/src/ScadaLink.AuditLog/Site/Telemetry/NoOpSiteStreamAuditClient.cs @@ -5,20 +5,18 @@ namespace ScadaLink.AuditLog.Site.Telemetry; /// /// Default registered by /// . -/// Ships with M2 site-sync-pipeline wiring; the real gRPC-backed -/// implementation is deferred to M6 reconciliation, where a site→central gRPC -/// channel will be introduced (no such channel exists today — sites talk to -/// central exclusively via Akka ClusterClient, while the gRPC SiteStreamService -/// is hosted on the SITE side for central→site streaming). +/// It is a no-op binding for composition roots that have no +/// SiteCommunicationActor — central and test roots. Site roles override +/// it in the Host with the ClusterClient-based +/// , which actually forwards audit +/// telemetry to central. /// /// /// /// Returns an empty so the /// doesn't flip any rows to -/// Forwarded when this NoOp is in effect — Bundle H's integration test -/// substitutes a stub client that routes directly to the central -/// AuditLogIngestActor in-process. Production wiring (M6) will replace -/// this binding with a real client. +/// Forwarded when this NoOp is in effect — rows stay Pending +/// until a real client (or a test stub) takes over. /// /// /// Audit-write paths are best-effort by contract: a NoOp client keeps the @@ -35,7 +33,8 @@ public sealed class NoOpSiteStreamAuditClient : ISiteStreamAuditClient { ArgumentNullException.ThrowIfNull(batch); // Empty ack — no EventIds will be flipped to Forwarded, so rows stay - // Pending until M6's real client (or a Bundle H test stub) takes over. + // Pending until the real ClusterClientSiteAuditClient (or a test stub) + // takes over. return Task.FromResult(EmptyAck); } @@ -43,11 +42,10 @@ public sealed class NoOpSiteStreamAuditClient : ISiteStreamAuditClient public Task IngestCachedTelemetryAsync(CachedTelemetryBatch batch, CancellationToken ct) { ArgumentNullException.ThrowIfNull(batch); - // Empty ack — same rationale as IngestAuditEventsAsync. The M3 - // CachedCallTelemetryForwarder still writes the audit + tracking rows to - // the site SQLite stores authoritatively; central-side state only - // materialises once M6's real gRPC client (or a Bundle G test stub) is - // wired in. + // Empty ack — same rationale as IngestAuditEventsAsync. The site still + // writes the audit + tracking rows to its SQLite stores authoritatively; + // central-side state only materialises once the real + // ClusterClientSiteAuditClient (or a test stub) is wired in. return Task.FromResult(EmptyAck); } } diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index 5508ad0..ac9bb89 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -676,7 +676,7 @@ akka {{ // SQLite Pending backlog actually drains to central. The forward Ask // reuses NotificationForwardTimeout — the same site→central command // forward bound notifications already use over this transport. - var siteAuditClient = (ScadaLink.AuditLog.Site.Telemetry.ISiteStreamAuditClient) + ScadaLink.AuditLog.Site.Telemetry.ISiteStreamAuditClient siteAuditClient = new ScadaLink.AuditLog.Site.Telemetry.ClusterClientSiteAuditClient( siteCommActor, _communicationOptions.NotificationForwardTimeout); diff --git a/tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs b/tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs index 73c18a6..05b2693 100644 --- a/tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs +++ b/tests/ScadaLink.IntegrationTests/AuditLog/SiteAuditPushFlowTests.cs @@ -185,9 +185,12 @@ public class SiteAuditPushFlowTests : TestKit // Central received and persisted the row. Assert.Contains(centralRepo.Rows, r => r.EventId == eventId); - // The site row is no longer Pending. - var stillPending = await writer.ReadPendingAsync(256, CancellationToken.None); - Assert.DoesNotContain(stillPending, r => r.EventId == eventId); + // The site row reached AuditForwardState.Forwarded specifically — + // not merely "no longer Pending" (a Reconciled row would also leave + // ReadPendingAsync, so we assert the positive Forwarded state). + var forwarded = await writer.ReadForwardedAsync(256, CancellationToken.None); + var row = Assert.Single(forwarded, r => r.EventId == eventId); + Assert.Equal(AuditForwardState.Forwarded, row.ForwardState); }, TimeSpan.FromSeconds(10), TimeSpan.FromMilliseconds(250)); // The central-persisted row carries the central-stamped IngestedAtUtc. From fdd1a4b88696ba6494e4940f733ae097b94c5c63 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 03:51:51 -0400 Subject: [PATCH 06/23] refactor(auditlog): consolidate AuditEvent DTO mappers into Communication --- .../Telemetry/ClusterClientSiteAuditClient.cs | 7 +- .../Site/Telemetry/SiteAuditTelemetryActor.cs | 3 +- .../Grpc/AuditEventDtoMapper.cs} | 20 ++- .../Grpc/SiteStreamGrpcServer.cs | 123 +----------------- .../CombinedTelemetryIdempotencyTests.cs | 3 +- .../CombinedTelemetryDispatcher.cs | 3 +- .../DirectActorSiteStreamAuditClient.cs | 7 +- .../ClusterClientSiteAuditClientTests.cs | 7 +- .../AuditEventDtoMapperTests.cs} | 25 ++-- 9 files changed, 45 insertions(+), 153 deletions(-) rename src/{ScadaLink.AuditLog/Telemetry/AuditEventMapper.cs => ScadaLink.Communication/Grpc/AuditEventDtoMapper.cs} (84%) rename tests/{ScadaLink.AuditLog.Tests/Telemetry/AuditEventMapperTests.cs => ScadaLink.Communication.Tests/AuditEventDtoMapperTests.cs} (92%) diff --git a/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs b/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs index dcf2fc4..2bf5f43 100644 --- a/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs +++ b/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs @@ -1,5 +1,4 @@ using Akka.Actor; -using ScadaLink.AuditLog.Telemetry; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Types; @@ -35,7 +34,7 @@ namespace ScadaLink.AuditLog.Site.Telemetry; /// The batches arrive as proto DTOs ( / /// ) because the /// builds them with -/// . This client converts them back into +/// . This client converts them back into /// the / entities the Akka /// command messages carry — the same DTO→entity translation the /// SiteStreamGrpcServer performs for the gRPC reconciliation path. @@ -71,7 +70,7 @@ public sealed class ClusterClientSiteAuditClient : ISiteStreamAuditClient var events = new List(batch.Events.Count); foreach (var dto in batch.Events) { - events.Add(AuditEventMapper.FromDto(dto)); + events.Add(AuditEventDtoMapper.FromDto(dto)); } // Ask throws AskTimeoutException on timeout and rethrows a @@ -92,7 +91,7 @@ public sealed class ClusterClientSiteAuditClient : ISiteStreamAuditClient var entries = new List(batch.Packets.Count); foreach (var packet in batch.Packets) { - var audit = AuditEventMapper.FromDto(packet.AuditEvent); + var audit = AuditEventDtoMapper.FromDto(packet.AuditEvent); var siteCall = MapSiteCall(packet.Operational); entries.Add(new CachedTelemetryEntry(audit, siteCall)); } diff --git a/src/ScadaLink.AuditLog/Site/Telemetry/SiteAuditTelemetryActor.cs b/src/ScadaLink.AuditLog/Site/Telemetry/SiteAuditTelemetryActor.cs index 724e1d1..e903e0a 100644 --- a/src/ScadaLink.AuditLog/Site/Telemetry/SiteAuditTelemetryActor.cs +++ b/src/ScadaLink.AuditLog/Site/Telemetry/SiteAuditTelemetryActor.cs @@ -1,7 +1,6 @@ using Akka.Actor; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; -using ScadaLink.AuditLog.Telemetry; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Interfaces.Services; using ScadaLink.Communication.Grpc; @@ -136,7 +135,7 @@ public class SiteAuditTelemetryActor : ReceiveActor var batch = new AuditEventBatch(); foreach (var e in events) { - batch.Events.Add(AuditEventMapper.ToDto(e)); + batch.Events.Add(AuditEventDtoMapper.ToDto(e)); } return batch; } diff --git a/src/ScadaLink.AuditLog/Telemetry/AuditEventMapper.cs b/src/ScadaLink.Communication/Grpc/AuditEventDtoMapper.cs similarity index 84% rename from src/ScadaLink.AuditLog/Telemetry/AuditEventMapper.cs rename to src/ScadaLink.Communication/Grpc/AuditEventDtoMapper.cs index d821db0..ed5fb22 100644 --- a/src/ScadaLink.AuditLog/Telemetry/AuditEventMapper.cs +++ b/src/ScadaLink.Communication/Grpc/AuditEventDtoMapper.cs @@ -1,16 +1,24 @@ using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Types.Enums; -using ScadaLink.Communication.Grpc; using Timestamp = Google.Protobuf.WellKnownTypes.Timestamp; -namespace ScadaLink.AuditLog.Telemetry; +namespace ScadaLink.Communication.Grpc; /// -/// Bridges Audit Log (#23) rows between the in-process record -/// and the wire-format exchanged over the -/// IngestAuditEvents RPC. +/// Canonical bridge for Audit Log (#23) rows between the in-process +/// record and the wire-format +/// exchanged over the IngestAuditEvents, IngestCachedTelemetry and +/// PullAuditEvents RPCs. /// /// +/// +/// This mapper lives in ScadaLink.Communication (which owns the generated +/// and references Commons for +/// ) so both SiteStreamGrpcServer and +/// ScadaLink.AuditLog can share one implementation without the +/// project-reference cycle that would result from hosting it in +/// ScadaLink.AuditLog (AuditLog → Communication, never the reverse). +/// /// Lossy by design: the proto contract intentionally omits two fields. /// /// — site-local SQLite state, never travels. @@ -22,7 +30,7 @@ namespace ScadaLink.AuditLog.Telemetry; /// Int32Value wrapper so they preserve true null semantics. /// /// -public static class AuditEventMapper +public static class AuditEventDtoMapper { /// /// Projects an into its wire-format DTO. Null reference diff --git a/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs b/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs index 8a92027..23a19d8 100644 --- a/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs +++ b/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs @@ -8,7 +8,6 @@ using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Interfaces.Services; using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Types; -using ScadaLink.Commons.Types.Enums; using GrpcStatus = Grpc.Core.Status; namespace ScadaLink.Communication.Grpc; @@ -224,13 +223,10 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase /// /// /// - /// The DTO→entity conversion is inlined here (rather than calling the - /// AuditLog mapper) to avoid a project-reference cycle: - /// ScadaLink.AuditLog already references - /// ScadaLink.Communication, so the gRPC server cannot reach back - /// into AuditLog for its mapper. The shape mirrors - /// AuditEventMapper.FromDto in ScadaLink.AuditLog.Telemetry; - /// the two must evolve together. + /// The DTO→entity conversion uses the shared + /// (hosted in ScadaLink.Communication so both this server and + /// ScadaLink.AuditLog share one implementation without a + /// project-reference cycle). /// /// /// When is not yet wired (host startup @@ -262,36 +258,10 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase return new IngestAck(); } - // Inlined FromDto. Keep in sync with AuditEventMapper.FromDto in - // ScadaLink.AuditLog.Telemetry — there is no shared mapper because - // doing so would create a project-reference cycle (AuditLog → Communication). var entities = new List(request.Events.Count); foreach (var dto in request.Events) { - entities.Add(new AuditEvent - { - EventId = Guid.Parse(dto.EventId), - OccurredAtUtc = DateTime.SpecifyKind(dto.OccurredAtUtc.ToDateTime(), DateTimeKind.Utc), - IngestedAtUtc = null, - Channel = Enum.Parse(dto.Channel), - Kind = Enum.Parse(dto.Kind), - CorrelationId = string.IsNullOrEmpty(dto.CorrelationId) ? null : Guid.Parse(dto.CorrelationId), - SourceSiteId = NullIfEmpty(dto.SourceSiteId), - SourceInstanceId = NullIfEmpty(dto.SourceInstanceId), - SourceScript = NullIfEmpty(dto.SourceScript), - Actor = NullIfEmpty(dto.Actor), - Target = NullIfEmpty(dto.Target), - Status = Enum.Parse(dto.Status), - HttpStatus = dto.HttpStatus, - DurationMs = dto.DurationMs, - ErrorMessage = NullIfEmpty(dto.ErrorMessage), - ErrorDetail = NullIfEmpty(dto.ErrorDetail), - RequestSummary = NullIfEmpty(dto.RequestSummary), - ResponseSummary = NullIfEmpty(dto.ResponseSummary), - PayloadTruncated = dto.PayloadTruncated, - Extra = NullIfEmpty(dto.Extra), - ForwardState = null, - }); + entities.Add(AuditEventDtoMapper.FromDto(dto)); } var cmd = new IngestAuditEventsCommand(entities); @@ -355,7 +325,7 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase var entries = new List(request.Packets.Count); foreach (var packet in request.Packets) { - var auditEvent = MapAuditEventFromDto(packet.AuditEvent); + var auditEvent = AuditEventDtoMapper.FromDto(packet.AuditEvent); var siteCall = MapSiteCallFromDto(packet.Operational); entries.Add(new CachedTelemetryEntry(auditEvent, siteCall)); } @@ -450,7 +420,7 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase }; foreach (var evt in events) { - response.Events.Add(AuditEventToDto(evt)); + response.Events.Add(AuditEventDtoMapper.ToDto(evt)); } // Flip to Reconciled AFTER projecting the response so a fault below the @@ -481,85 +451,6 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase return response; } - /// - /// Inlined audit-event entity→DTO translation. Keep in sync with - /// AuditEventMapper.ToDto in ScadaLink.AuditLog.Telemetry — - /// the project-reference cycle (AuditLog → Communication) prevents calling - /// the AuditLog mapper directly. The shape mirrors the FromDto pair above. - /// - private static AuditEventDto AuditEventToDto(AuditEvent evt) - { - var dto = new AuditEventDto - { - EventId = evt.EventId.ToString(), - OccurredAtUtc = Google.Protobuf.WellKnownTypes.Timestamp.FromDateTime(EnsureUtc(evt.OccurredAtUtc)), - Channel = evt.Channel.ToString(), - Kind = evt.Kind.ToString(), - CorrelationId = evt.CorrelationId?.ToString() ?? string.Empty, - SourceSiteId = evt.SourceSiteId ?? string.Empty, - SourceInstanceId = evt.SourceInstanceId ?? string.Empty, - SourceScript = evt.SourceScript ?? string.Empty, - Actor = evt.Actor ?? string.Empty, - Target = evt.Target ?? string.Empty, - Status = evt.Status.ToString(), - ErrorMessage = evt.ErrorMessage ?? string.Empty, - ErrorDetail = evt.ErrorDetail ?? string.Empty, - RequestSummary = evt.RequestSummary ?? string.Empty, - ResponseSummary = evt.ResponseSummary ?? string.Empty, - PayloadTruncated = evt.PayloadTruncated, - Extra = evt.Extra ?? string.Empty, - }; - - if (evt.HttpStatus.HasValue) dto.HttpStatus = evt.HttpStatus.Value; - if (evt.DurationMs.HasValue) dto.DurationMs = evt.DurationMs.Value; - - return dto; - } - - private static DateTime EnsureUtc(DateTime value) => - value.Kind == DateTimeKind.Utc - ? value - : DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc); - - private static string? NullIfEmpty(string? value) => - string.IsNullOrEmpty(value) ? null : value; - - /// - /// Inlined audit-event DTO→entity translation, kept in sync with the - /// handler above. Extracted to a private - /// helper so the M3 dual-write RPC can reuse it without duplicating yet - /// another copy. The shape still mirrors - /// AuditEventMapper.FromDto in ScadaLink.AuditLog.Telemetry; - /// the two must evolve together (the project-reference cycle that - /// prevents calling the AuditLog mapper directly is documented on - /// ). - /// - private static AuditEvent MapAuditEventFromDto(AuditEventDto dto) => - new() - { - EventId = Guid.Parse(dto.EventId), - OccurredAtUtc = DateTime.SpecifyKind(dto.OccurredAtUtc.ToDateTime(), DateTimeKind.Utc), - IngestedAtUtc = null, - Channel = Enum.Parse(dto.Channel), - Kind = Enum.Parse(dto.Kind), - CorrelationId = NullIfEmpty(dto.CorrelationId) is { } cid ? Guid.Parse(cid) : null, - SourceSiteId = NullIfEmpty(dto.SourceSiteId), - SourceInstanceId = NullIfEmpty(dto.SourceInstanceId), - SourceScript = NullIfEmpty(dto.SourceScript), - Actor = NullIfEmpty(dto.Actor), - Target = NullIfEmpty(dto.Target), - Status = Enum.Parse(dto.Status), - HttpStatus = dto.HttpStatus, - DurationMs = dto.DurationMs, - ErrorMessage = NullIfEmpty(dto.ErrorMessage), - ErrorDetail = NullIfEmpty(dto.ErrorDetail), - RequestSummary = NullIfEmpty(dto.RequestSummary), - ResponseSummary = NullIfEmpty(dto.ResponseSummary), - PayloadTruncated = dto.PayloadTruncated, - Extra = NullIfEmpty(dto.Extra), - ForwardState = null, - }; - /// /// Translates a into the persistence /// entity. is stamped here as a diff --git a/tests/ScadaLink.AuditLog.Tests/Integration/CombinedTelemetryIdempotencyTests.cs b/tests/ScadaLink.AuditLog.Tests/Integration/CombinedTelemetryIdempotencyTests.cs index 1d67ed7..63d8574 100644 --- a/tests/ScadaLink.AuditLog.Tests/Integration/CombinedTelemetryIdempotencyTests.cs +++ b/tests/ScadaLink.AuditLog.Tests/Integration/CombinedTelemetryIdempotencyTests.cs @@ -1,7 +1,6 @@ using Akka.TestKit.Xunit2; using Microsoft.EntityFrameworkCore; using ScadaLink.AuditLog.Tests.Integration.Infrastructure; -using ScadaLink.AuditLog.Telemetry; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Messages.Integration; using ScadaLink.Commons.Types; @@ -55,7 +54,7 @@ public class CombinedTelemetryIdempotencyTests : TestKit, IClassFixture(batch.Events.Count); foreach (var dto in batch.Events) { - events.Add(AuditEventMapper.FromDto(dto)); + events.Add(AuditEventDtoMapper.FromDto(dto)); } // Ask the central actor; the reply carries the accepted EventIds. @@ -114,7 +113,7 @@ public sealed class DirectActorSiteStreamAuditClient : ISiteStreamAuditClient /// back into the proto ack. /// /// - /// Uses the shared for the audit half; + /// Uses the shared for the audit half; /// the SiteCall DTO is decoded inline because the AuditLog mapper does not /// (and should not) know about — the /// production gRPC server (Bundle D) uses the same inline shape. @@ -132,7 +131,7 @@ public sealed class DirectActorSiteStreamAuditClient : ISiteStreamAuditClient var entries = new List(batch.Packets.Count); foreach (var packet in batch.Packets) { - var audit = AuditEventMapper.FromDto(packet.AuditEvent); + var audit = AuditEventDtoMapper.FromDto(packet.AuditEvent); var siteCall = MapSiteCallFromDto(packet.Operational); entries.Add(new CachedTelemetryEntry(audit, siteCall)); } diff --git a/tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs b/tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs index d9cbe82..b1005d3 100644 --- a/tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs +++ b/tests/ScadaLink.AuditLog.Tests/Site/Telemetry/ClusterClientSiteAuditClientTests.cs @@ -2,7 +2,6 @@ using Akka.Actor; using Akka.TestKit.Xunit2; using Google.Protobuf.WellKnownTypes; using ScadaLink.AuditLog.Site.Telemetry; -using ScadaLink.AuditLog.Telemetry; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Types.Enums; @@ -46,7 +45,7 @@ public class ClusterClientSiteAuditClientTests : TestKit var batch = new AuditEventBatch(); foreach (var e in events) { - batch.Events.Add(AuditEventMapper.ToDto(e)); + batch.Events.Add(AuditEventDtoMapper.ToDto(e)); } return batch; } @@ -158,7 +157,7 @@ public class ClusterClientSiteAuditClientTests : TestKit { batch.Packets.Add(new CachedTelemetryPacket { - AuditEvent = AuditEventMapper.ToDto(e), + AuditEvent = AuditEventDtoMapper.ToDto(e), Operational = NewOperationalDto(), }); } @@ -190,7 +189,7 @@ public class ClusterClientSiteAuditClientTests : TestKit var batch = new CachedTelemetryBatch(); batch.Packets.Add(new CachedTelemetryPacket { - AuditEvent = AuditEventMapper.ToDto(NewEvent()), + AuditEvent = AuditEventDtoMapper.ToDto(NewEvent()), Operational = NewOperationalDto(), }); diff --git a/tests/ScadaLink.AuditLog.Tests/Telemetry/AuditEventMapperTests.cs b/tests/ScadaLink.Communication.Tests/AuditEventDtoMapperTests.cs similarity index 92% rename from tests/ScadaLink.AuditLog.Tests/Telemetry/AuditEventMapperTests.cs rename to tests/ScadaLink.Communication.Tests/AuditEventDtoMapperTests.cs index 6901361..a247ec6 100644 --- a/tests/ScadaLink.AuditLog.Tests/Telemetry/AuditEventMapperTests.cs +++ b/tests/ScadaLink.Communication.Tests/AuditEventDtoMapperTests.cs @@ -1,18 +1,17 @@ using Google.Protobuf.WellKnownTypes; -using ScadaLink.AuditLog.Telemetry; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Types.Enums; using ScadaLink.Communication.Grpc; -namespace ScadaLink.AuditLog.Tests.Telemetry; +namespace ScadaLink.Communication.Tests; /// -/// Round-trip + edge tests for the that bridges +/// Round-trip + edge tests for the that bridges /// (Commons) ↔ (proto). /// ForwardState is site-local and IngestedAtUtc is central-set, so neither survives /// the proto round-trip. /// -public class AuditEventMapperTests +public class AuditEventDtoMapperTests { [Fact] public void ToDto_FromDto_Roundtrip_FullyPopulated_PreservesAllFields() @@ -47,8 +46,8 @@ public class AuditEventMapperTests ForwardState = AuditForwardState.Pending }; - var dto = AuditEventMapper.ToDto(original); - var roundTripped = AuditEventMapper.FromDto(dto); + var dto = AuditEventDtoMapper.ToDto(original); + var roundTripped = AuditEventDtoMapper.FromDto(dto); Assert.Equal(original.EventId, roundTripped.EventId); Assert.Equal(original.OccurredAtUtc, roundTripped.OccurredAtUtc); @@ -88,7 +87,7 @@ public class AuditEventMapperTests // all string? fields left null; CorrelationId null }; - var dto = AuditEventMapper.ToDto(evt); + var dto = AuditEventDtoMapper.ToDto(evt); Assert.Equal(string.Empty, dto.CorrelationId); Assert.Equal(string.Empty, dto.SourceSiteId); @@ -126,7 +125,7 @@ public class AuditEventMapperTests Extra = string.Empty }; - var evt = AuditEventMapper.FromDto(dto); + var evt = AuditEventDtoMapper.FromDto(dto); Assert.Null(evt.CorrelationId); Assert.Null(evt.SourceSiteId); @@ -154,8 +153,8 @@ public class AuditEventMapperTests Status = AuditStatus.Delivered }; - var dto = AuditEventMapper.ToDto(evt); - var roundTripped = AuditEventMapper.FromDto(dto); + var dto = AuditEventDtoMapper.ToDto(evt); + var roundTripped = AuditEventDtoMapper.FromDto(dto); Assert.Equal(DateTimeKind.Utc, roundTripped.OccurredAtUtc.Kind); Assert.Equal(occurredAt, roundTripped.OccurredAtUtc); @@ -175,7 +174,7 @@ public class AuditEventMapperTests DurationMs = null }; - var dto = AuditEventMapper.ToDto(evt); + var dto = AuditEventDtoMapper.ToDto(evt); Assert.Null(dto.HttpStatus); Assert.Null(dto.DurationMs); @@ -197,7 +196,7 @@ public class AuditEventMapperTests Assert.Null(dto.HttpStatus); Assert.Null(dto.DurationMs); - var evt = AuditEventMapper.FromDto(dto); + var evt = AuditEventDtoMapper.FromDto(dto); Assert.Null(evt.HttpStatus); Assert.Null(evt.DurationMs); @@ -215,7 +214,7 @@ public class AuditEventMapperTests Status = AuditStatus.Parked }; - var dto = AuditEventMapper.ToDto(evt); + var dto = AuditEventDtoMapper.ToDto(evt); Assert.Equal("ApiOutbound", dto.Channel); Assert.Equal("ApiCallCached", dto.Kind); From 6f0d2ca49933a91f15f3142e46a5822b3d5c776b Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 04:00:20 -0400 Subject: [PATCH 07/23] refactor(auditlog): consolidate SiteCall DTO mapper into Communication Extract the verbatim-duplicated SiteCallOperationalDto -> SiteCall mapper into a single public SiteCallDtoMapper static class in ScadaLink.Communication.Grpc, mirroring AuditEventDtoMapper. Replaces three identical private copies (SiteStreamGrpcServer.MapSiteCallFromDto, ClusterClientSiteAuditClient.MapSiteCall, and the test-infra DirectActorSiteStreamAuditClient.MapSiteCallFromDto), removes the now-stale doc comment that justified the duplication, and drops the using directives that became unused. Adds SiteCallDtoMapperTests for field-by-field coverage. Only the FromDto direction is provided: nothing maps SiteCall back onto the wire, so a ToDto would be dead code. --- .../Telemetry/ClusterClientSiteAuditClient.cs | 30 +--- .../Grpc/SiteCallDtoMapper.cs | 70 +++++++++ .../Grpc/SiteStreamGrpcServer.cs | 28 +--- .../DirectActorSiteStreamAuditClient.cs | 34 +---- .../SiteCallDtoMapperTests.cs | 135 ++++++++++++++++++ 5 files changed, 211 insertions(+), 86 deletions(-) create mode 100644 src/ScadaLink.Communication/Grpc/SiteCallDtoMapper.cs create mode 100644 tests/ScadaLink.Communication.Tests/SiteCallDtoMapperTests.cs diff --git a/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs b/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs index 2bf5f43..492065e 100644 --- a/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs +++ b/src/ScadaLink.AuditLog/Site/Telemetry/ClusterClientSiteAuditClient.cs @@ -1,7 +1,6 @@ using Akka.Actor; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Messages.Audit; -using ScadaLink.Commons.Types; using ScadaLink.Communication.Grpc; namespace ScadaLink.AuditLog.Site.Telemetry; @@ -92,7 +91,7 @@ public sealed class ClusterClientSiteAuditClient : ISiteStreamAuditClient foreach (var packet in batch.Packets) { var audit = AuditEventDtoMapper.FromDto(packet.AuditEvent); - var siteCall = MapSiteCall(packet.Operational); + var siteCall = SiteCallDtoMapper.FromDto(packet.Operational); entries.Add(new CachedTelemetryEntry(audit, siteCall)); } @@ -115,31 +114,4 @@ public sealed class ClusterClientSiteAuditClient : ISiteStreamAuditClient } return ack; } - - /// - /// Translates a into the - /// persistence entity. Mirrors - /// SiteStreamGrpcServer.MapSiteCallFromDto — there is no shared - /// mapper because that lives in ScadaLink.Communication as a private - /// helper. is a placeholder; the - /// central AuditLogIngestActor overwrites it inside the dual-write - /// transaction so the AuditLog and SiteCalls rows share one instant. - /// - private static SiteCall MapSiteCall(SiteCallOperationalDto dto) => new() - { - TrackedOperationId = TrackedOperationId.Parse(dto.TrackedOperationId), - Channel = dto.Channel, - Target = dto.Target, - SourceSite = dto.SourceSite, - Status = dto.Status, - RetryCount = dto.RetryCount, - LastError = string.IsNullOrEmpty(dto.LastError) ? null : dto.LastError, - HttpStatus = dto.HttpStatus, - CreatedAtUtc = DateTime.SpecifyKind(dto.CreatedAtUtc.ToDateTime(), DateTimeKind.Utc), - UpdatedAtUtc = DateTime.SpecifyKind(dto.UpdatedAtUtc.ToDateTime(), DateTimeKind.Utc), - TerminalAtUtc = dto.TerminalAtUtc is null - ? null - : DateTime.SpecifyKind(dto.TerminalAtUtc.ToDateTime(), DateTimeKind.Utc), - IngestedAtUtc = DateTime.UtcNow, // overwritten by AuditLogIngestActor - }; } diff --git a/src/ScadaLink.Communication/Grpc/SiteCallDtoMapper.cs b/src/ScadaLink.Communication/Grpc/SiteCallDtoMapper.cs new file mode 100644 index 0000000..c61e3e5 --- /dev/null +++ b/src/ScadaLink.Communication/Grpc/SiteCallDtoMapper.cs @@ -0,0 +1,70 @@ +using ScadaLink.Commons.Entities.Audit; +using ScadaLink.Commons.Types; + +namespace ScadaLink.Communication.Grpc; + +/// +/// Canonical bridge for Site Call Audit (#22) operational rows between the +/// wire-format exchanged on the +/// CachedCallTelemetry packet and the in-process +/// persistence entity central writes into the SiteCalls table. +/// +/// +/// +/// This mapper lives in ScadaLink.Communication (which owns the generated +/// and references Commons for +/// ) so both SiteStreamGrpcServer and +/// ScadaLink.AuditLog can share one implementation without the +/// project-reference cycle that would result from hosting it in +/// ScadaLink.AuditLog (AuditLog → Communication, never the reverse). +/// Mirrors the sibling . +/// +/// +/// Only the DTO→entity direction is provided: nothing in the system maps a +/// back onto the wire (sites emit the operational state +/// from SiteCallOperational, never from the central +/// entity), so an entity→DTO method would be dead code. +/// +/// +/// String nullability convention: proto3 scalar strings cannot be absent, so the +/// optional rehydrates from an empty string back +/// to null. The optional HttpStatus and TerminalAtUtc use proto +/// wrappers so they preserve true null semantics. +/// +/// +public static class SiteCallDtoMapper +{ + /// + /// Reconstructs a persistence entity from its + /// wire-format DTO. An empty LastError rehydrates as null; absent + /// HttpStatus/TerminalAtUtc wrappers stay null. + /// + /// + /// is stamped here as a placeholder + /// (); the central ingest actor overwrites it + /// inside the dual-write transaction so the AuditLog and SiteCalls rows + /// share one instant. The value sent on the wire is informational only. + /// + public static SiteCall FromDto(SiteCallOperationalDto dto) + { + ArgumentNullException.ThrowIfNull(dto); + + return new SiteCall + { + TrackedOperationId = TrackedOperationId.Parse(dto.TrackedOperationId), + Channel = dto.Channel, + Target = dto.Target, + SourceSite = dto.SourceSite, + Status = dto.Status, + RetryCount = dto.RetryCount, + LastError = string.IsNullOrEmpty(dto.LastError) ? null : dto.LastError, + HttpStatus = dto.HttpStatus, + CreatedAtUtc = DateTime.SpecifyKind(dto.CreatedAtUtc.ToDateTime(), DateTimeKind.Utc), + UpdatedAtUtc = DateTime.SpecifyKind(dto.UpdatedAtUtc.ToDateTime(), DateTimeKind.Utc), + TerminalAtUtc = dto.TerminalAtUtc is null + ? null + : DateTime.SpecifyKind(dto.TerminalAtUtc.ToDateTime(), DateTimeKind.Utc), + IngestedAtUtc = DateTime.UtcNow, // overwritten by AuditLogIngestActor + }; + } +} diff --git a/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs b/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs index 23a19d8..e75db33 100644 --- a/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs +++ b/src/ScadaLink.Communication/Grpc/SiteStreamGrpcServer.cs @@ -7,7 +7,6 @@ using Microsoft.Extensions.Options; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Interfaces.Services; using ScadaLink.Commons.Messages.Audit; -using ScadaLink.Commons.Types; using GrpcStatus = Grpc.Core.Status; namespace ScadaLink.Communication.Grpc; @@ -326,7 +325,7 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase foreach (var packet in request.Packets) { var auditEvent = AuditEventDtoMapper.FromDto(packet.AuditEvent); - var siteCall = MapSiteCallFromDto(packet.Operational); + var siteCall = SiteCallDtoMapper.FromDto(packet.Operational); entries.Add(new CachedTelemetryEntry(auditEvent, siteCall)); } @@ -451,31 +450,6 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase return response; } - /// - /// Translates a into the persistence - /// entity. is stamped here as a - /// placeholder; the central ingest actor overwrites it inside the - /// dual-write transaction so the AuditLog and SiteCalls rows share one - /// instant. - /// - private static SiteCall MapSiteCallFromDto(SiteCallOperationalDto dto) => new() - { - TrackedOperationId = TrackedOperationId.Parse(dto.TrackedOperationId), - Channel = dto.Channel, - Target = dto.Target, - SourceSite = dto.SourceSite, - Status = dto.Status, - RetryCount = dto.RetryCount, - LastError = string.IsNullOrEmpty(dto.LastError) ? null : dto.LastError, - HttpStatus = dto.HttpStatus, - CreatedAtUtc = DateTime.SpecifyKind(dto.CreatedAtUtc.ToDateTime(), DateTimeKind.Utc), - UpdatedAtUtc = DateTime.SpecifyKind(dto.UpdatedAtUtc.ToDateTime(), DateTimeKind.Utc), - TerminalAtUtc = dto.TerminalAtUtc is null - ? null - : DateTime.SpecifyKind(dto.TerminalAtUtc.ToDateTime(), DateTimeKind.Utc), - IngestedAtUtc = DateTime.UtcNow, // overwritten by AuditLogIngestActor - }; - /// /// Tracks a single active stream so cleanup only removes its own entry. /// diff --git a/tests/ScadaLink.AuditLog.Tests/Integration/Infrastructure/DirectActorSiteStreamAuditClient.cs b/tests/ScadaLink.AuditLog.Tests/Integration/Infrastructure/DirectActorSiteStreamAuditClient.cs index cb5e455..9fa1482 100644 --- a/tests/ScadaLink.AuditLog.Tests/Integration/Infrastructure/DirectActorSiteStreamAuditClient.cs +++ b/tests/ScadaLink.AuditLog.Tests/Integration/Infrastructure/DirectActorSiteStreamAuditClient.cs @@ -2,7 +2,6 @@ using Akka.Actor; using ScadaLink.AuditLog.Site.Telemetry; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Messages.Audit; -using ScadaLink.Commons.Types; using ScadaLink.Communication.Grpc; namespace ScadaLink.AuditLog.Tests.Integration.Infrastructure; @@ -113,10 +112,9 @@ public sealed class DirectActorSiteStreamAuditClient : ISiteStreamAuditClient /// back into the proto ack. /// /// - /// Uses the shared for the audit half; - /// the SiteCall DTO is decoded inline because the AuditLog mapper does not - /// (and should not) know about — the - /// production gRPC server (Bundle D) uses the same inline shape. + /// Uses the shared for the audit half + /// and for the SiteCall half — the same + /// canonical mappers the production SiteStreamGrpcServer uses. /// public async Task IngestCachedTelemetryAsync(CachedTelemetryBatch batch, CancellationToken ct) { @@ -132,7 +130,7 @@ public sealed class DirectActorSiteStreamAuditClient : ISiteStreamAuditClient foreach (var packet in batch.Packets) { var audit = AuditEventDtoMapper.FromDto(packet.AuditEvent); - var siteCall = MapSiteCallFromDto(packet.Operational); + var siteCall = SiteCallDtoMapper.FromDto(packet.Operational); entries.Add(new CachedTelemetryEntry(audit, siteCall)); } @@ -149,28 +147,4 @@ public sealed class DirectActorSiteStreamAuditClient : ISiteStreamAuditClient } return ack; } - - /// - /// Mirrors SiteStreamGrpcServer.MapSiteCallFromDto — keep the two in - /// sync. The placeholder stamped here - /// is overwritten by the central ingest actor inside the dual-write - /// transaction, so the value sent on the wire is informational only. - /// - private static SiteCall MapSiteCallFromDto(SiteCallOperationalDto dto) => new() - { - TrackedOperationId = TrackedOperationId.Parse(dto.TrackedOperationId), - Channel = dto.Channel, - Target = dto.Target, - SourceSite = dto.SourceSite, - Status = dto.Status, - RetryCount = dto.RetryCount, - LastError = string.IsNullOrEmpty(dto.LastError) ? null : dto.LastError, - HttpStatus = dto.HttpStatus, - CreatedAtUtc = DateTime.SpecifyKind(dto.CreatedAtUtc.ToDateTime(), DateTimeKind.Utc), - UpdatedAtUtc = DateTime.SpecifyKind(dto.UpdatedAtUtc.ToDateTime(), DateTimeKind.Utc), - TerminalAtUtc = dto.TerminalAtUtc is null - ? null - : DateTime.SpecifyKind(dto.TerminalAtUtc.ToDateTime(), DateTimeKind.Utc), - IngestedAtUtc = DateTime.UtcNow, - }; } diff --git a/tests/ScadaLink.Communication.Tests/SiteCallDtoMapperTests.cs b/tests/ScadaLink.Communication.Tests/SiteCallDtoMapperTests.cs new file mode 100644 index 0000000..4de1d37 --- /dev/null +++ b/tests/ScadaLink.Communication.Tests/SiteCallDtoMapperTests.cs @@ -0,0 +1,135 @@ +using Google.Protobuf.WellKnownTypes; +using ScadaLink.Communication.Grpc; + +namespace ScadaLink.Communication.Tests; + +/// +/// Field-coverage + edge tests for the that +/// decodes (proto) into the +/// persistence entity. +/// Only the DTO→entity direction exists — nothing in the system maps a +/// SiteCall back onto the wire — so there is no round-trip test. +/// IngestedAtUtc is a site-side placeholder the central ingest actor +/// overwrites, so it is asserted as "recent UTC" rather than a fixed value. +/// +public class SiteCallDtoMapperTests +{ + [Fact] + public void FromDto_FullyPopulated_MapsEveryField() + { + var trackedOperationId = Guid.NewGuid(); + var createdAt = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc); + var updatedAt = new DateTime(2026, 5, 20, 10, 5, 0, DateTimeKind.Utc); + var terminalAt = new DateTime(2026, 5, 20, 10, 10, 0, DateTimeKind.Utc); + + var dto = new SiteCallOperationalDto + { + TrackedOperationId = trackedOperationId.ToString(), + Channel = "ApiOutbound", + Target = "ERP.GetOrder", + SourceSite = "site-melbourne", + Status = "Delivered", + RetryCount = 3, + LastError = "transient 503", + HttpStatus = 200, + CreatedAtUtc = Timestamp.FromDateTime(createdAt), + UpdatedAtUtc = Timestamp.FromDateTime(updatedAt), + TerminalAtUtc = Timestamp.FromDateTime(terminalAt), + }; + + var entity = SiteCallDtoMapper.FromDto(dto); + + Assert.Equal(trackedOperationId, entity.TrackedOperationId.Value); + Assert.Equal("ApiOutbound", entity.Channel); + Assert.Equal("ERP.GetOrder", entity.Target); + Assert.Equal("site-melbourne", entity.SourceSite); + Assert.Equal("Delivered", entity.Status); + Assert.Equal(3, entity.RetryCount); + Assert.Equal("transient 503", entity.LastError); + Assert.Equal(200, entity.HttpStatus); + Assert.Equal(createdAt, entity.CreatedAtUtc); + Assert.Equal(updatedAt, entity.UpdatedAtUtc); + Assert.Equal(terminalAt, entity.TerminalAtUtc); + } + + [Fact] + public void FromDto_EmptyLastError_BecomesNull() + { + var dto = NewMinimalDto(); + dto.LastError = string.Empty; + + var entity = SiteCallDtoMapper.FromDto(dto); + + Assert.Null(entity.LastError); + } + + [Fact] + public void FromDto_AbsentHttpStatus_StaysNull() + { + // Int32Value wrapper unset on the wire — preserves true null semantics + // for non-API cached writes. + var dto = NewMinimalDto(); + + Assert.Null(dto.HttpStatus); + + var entity = SiteCallDtoMapper.FromDto(dto); + + Assert.Null(entity.HttpStatus); + } + + [Fact] + public void FromDto_AbsentTerminalAt_StaysNull() + { + // Timestamp wrapper unset while the call is still active. + var dto = NewMinimalDto(); + + Assert.Null(dto.TerminalAtUtc); + + var entity = SiteCallDtoMapper.FromDto(dto); + + Assert.Null(entity.TerminalAtUtc); + } + + [Fact] + public void FromDto_Timestamps_RehydrateAsUtcKind() + { + var dto = NewMinimalDto(); + + var entity = SiteCallDtoMapper.FromDto(dto); + + Assert.Equal(DateTimeKind.Utc, entity.CreatedAtUtc.Kind); + Assert.Equal(DateTimeKind.Utc, entity.UpdatedAtUtc.Kind); + } + + [Fact] + public void FromDto_IngestedAtUtc_StampedAsRecentPlaceholder() + { + // IngestedAtUtc is a site-side DateTime.UtcNow placeholder; the central + // ingest actor overwrites it inside the dual-write transaction. + var before = DateTime.UtcNow; + + var entity = SiteCallDtoMapper.FromDto(NewMinimalDto()); + + var after = DateTime.UtcNow; + Assert.InRange(entity.IngestedAtUtc, before, after); + Assert.Equal(DateTimeKind.Utc, entity.IngestedAtUtc.Kind); + } + + [Fact] + public void FromDto_Null_Throws() + { + Assert.Throws(() => SiteCallDtoMapper.FromDto(null!)); + } + + private static SiteCallOperationalDto NewMinimalDto() => new() + { + TrackedOperationId = Guid.NewGuid().ToString(), + Channel = "DbOutbound", + Target = "warehouse.dbo.WriteOrder", + SourceSite = "site-brisbane", + Status = "Submitted", + RetryCount = 0, + CreatedAtUtc = Timestamp.FromDateTime(DateTime.UtcNow), + UpdatedAtUtc = Timestamp.FromDateTime(DateTime.UtcNow), + }; +} From e3519fdb39c908dda10ec45f0c7a065a4bdf314b Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 04:14:49 -0400 Subject: [PATCH 08/23] feat(sitecallaudit): query, KPI and detail backend for the Site Calls page --- .../Repositories/ISiteCallAuditRepository.cs | 23 ++ .../Messages/Audit/SiteCallQueries.cs | 153 ++++++++ .../Types/Audit/SiteCallKpiSnapshot.cs | 38 ++ .../Types/Audit/SiteCallSiteKpiSnapshot.cs | 34 ++ .../CommunicationService.cs | 52 +++ .../Repositories/SiteCallAuditRepository.cs | 135 +++++++ .../Actors/AkkaHostedService.cs | 10 +- .../ScadaLink.SiteCallAudit.csproj | 2 + .../ServiceCollectionExtensions.cs | 27 +- .../SiteCallAuditActor.cs | 334 +++++++++++++++++- .../SiteCallAuditOptions.cs | 26 ++ ...ditLogIngestActorCombinedTelemetryTests.cs | 12 + .../Messages/SiteCallQueriesTests.cs | 128 +++++++ .../CommunicationServiceTests.cs | 146 ++++++++ .../SiteCallAuditRepositoryTests.cs | 98 +++++ .../SiteCallAuditActorTests.cs | 299 +++++++++++++++- .../SiteCallAuditOptionsTests.cs | 15 + 17 files changed, 1514 insertions(+), 18 deletions(-) create mode 100644 src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs create mode 100644 src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs create mode 100644 src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs create mode 100644 src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs create mode 100644 tests/ScadaLink.Commons.Tests/Messages/SiteCallQueriesTests.cs create mode 100644 tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditOptionsTests.cs diff --git a/src/ScadaLink.Commons/Interfaces/Repositories/ISiteCallAuditRepository.cs b/src/ScadaLink.Commons/Interfaces/Repositories/ISiteCallAuditRepository.cs index 7bb3790..81c3a48 100644 --- a/src/ScadaLink.Commons/Interfaces/Repositories/ISiteCallAuditRepository.cs +++ b/src/ScadaLink.Commons/Interfaces/Repositories/ISiteCallAuditRepository.cs @@ -63,4 +63,27 @@ public interface ISiteCallAuditRepository /// deleted. /// Task PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default); + + /// + /// Computes a point-in-time global from the + /// SiteCalls table. Counts are aggregated server-side (no row + /// materialisation): StuckCount uses ; + /// FailedLastInterval / DeliveredLastInterval use + /// ; the current time for OldestPendingAge + /// is captured inside the method. + /// + Task ComputeKpisAsync( + DateTime stuckCutoff, + DateTime intervalSince, + CancellationToken ct = default); + + /// + /// Computes a point-in-time per source + /// site. Sites with no SiteCalls rows at all are omitted. The stuck + /// cutoff and interval bounds are interpreted as in . + /// + Task> ComputePerSiteKpisAsync( + DateTime stuckCutoff, + DateTime intervalSince, + CancellationToken ct = default); } diff --git a/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs b/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs new file mode 100644 index 0000000..d23a467 --- /dev/null +++ b/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs @@ -0,0 +1,153 @@ +using ScadaLink.Commons.Types.Audit; + +namespace ScadaLink.Commons.Messages.Audit; + +/// +/// Site Calls UI -> Central: paginated, filtered query over the central +/// SiteCalls table (Site Call Audit #22). All filter fields are optional; +/// restricts results to stuck cached calls. Mirrors +/// +/// but uses keyset paging ( + ) +/// to match the repository's (CreatedAtUtc DESC, TrackedOperationId DESC) +/// cursor, rather than page numbers. +/// +/// +/// matches the SiteCall.Channel column — +/// "ApiOutbound" or "DbOutbound" (the spec's Kind notion; +/// the entity exposes it as Channel). is an +/// exact-match target filter, consistent with the repository's +/// predicate. +/// +public sealed record SiteCallQueryRequest( + string CorrelationId, + string? StatusFilter, + string? SourceSiteFilter, + string? ChannelFilter, + string? TargetKeyword, + bool StuckOnly, + DateTime? FromUtc, + DateTime? ToUtc, + DateTime? AfterCreatedAtUtc, + Guid? AfterId, + int PageSize); + +/// +/// A single SiteCalls row summarised for the Site Calls UI grid. Carries +/// only the columns the +/// entity genuinely exposes — there are no source-instance/script provenance +/// columns on that entity, so unlike +/// +/// none are surfaced here. +/// +public sealed record SiteCallSummary( + Guid TrackedOperationId, + string SourceSite, + string Channel, + string Target, + string Status, + int RetryCount, + string? LastError, + int? HttpStatus, + DateTime CreatedAtUtc, + DateTime UpdatedAtUtc, + DateTime? TerminalAtUtc, + bool IsStuck); + +/// +/// Central -> Site Calls UI: paginated response for a . +/// The keyset cursor of the last row is echoed back as +/// + for the caller +/// to request the following page; both are null when the page was empty. +/// On a repository fault is false, +/// carries the cause and is empty. +/// +public sealed record SiteCallQueryResponse( + string CorrelationId, + bool Success, + string? ErrorMessage, + IReadOnlyList SiteCalls, + DateTime? NextAfterCreatedAtUtc, + Guid? NextAfterId); + +/// +/// Site Calls UI -> Central: request for the full detail of a single cached call, +/// for the report detail modal. +/// +public sealed record SiteCallDetailRequest( + string CorrelationId, + Guid TrackedOperationId); + +/// +/// Central -> Site Calls UI: full detail for one cached call. On a repository +/// fault or missing row, is false / +/// is null and carries +/// the cause. +/// +public sealed record SiteCallDetailResponse( + string CorrelationId, + bool Success, + string? ErrorMessage, + SiteCallDetail? Detail); + +/// +/// Full SiteCalls row detail for the report detail modal — every field +/// on the entity, +/// including and the +/// timestamp the grid summary omits. +/// +public sealed record SiteCallDetail( + Guid TrackedOperationId, + string SourceSite, + string Channel, + string Target, + string Status, + int RetryCount, + string? LastError, + int? HttpStatus, + DateTime CreatedAtUtc, + DateTime UpdatedAtUtc, + DateTime? TerminalAtUtc, + DateTime IngestedAtUtc); + +/// +/// Site Calls UI -> Central: request for the global SiteCalls KPI summary. +/// Mirrors . +/// +public sealed record SiteCallKpiRequest( + string CorrelationId); + +/// +/// Central -> Site Calls UI: KPI summary for the Site Calls dashboard. On a +/// repository fault is false, +/// carries the cause, and the KPI fields are +/// zeroed/null. +/// +public sealed record SiteCallKpiResponse( + string CorrelationId, + bool Success, + string? ErrorMessage, + int BufferedCount, + int ParkedCount, + int FailedLastInterval, + int DeliveredLastInterval, + TimeSpan? OldestPendingAge, + int StuckCount); + +/// +/// Site Calls UI -> Central: request for the per-source-site SiteCalls +/// KPI breakdown. Mirrors +/// . +/// +public sealed record PerSiteSiteCallKpiRequest( + string CorrelationId); + +/// +/// Central -> Site Calls UI: per-site KPI breakdown for the Site Calls KPIs +/// page. On a repository fault is false, +/// carries the cause, and is empty. +/// +public sealed record PerSiteSiteCallKpiResponse( + string CorrelationId, + bool Success, + string? ErrorMessage, + IReadOnlyList Sites); diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs new file mode 100644 index 0000000..07873fb --- /dev/null +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs @@ -0,0 +1,38 @@ +namespace ScadaLink.Commons.Types.Audit; + +/// +/// Point-in-time operational metrics for the central SiteCalls table +/// (Site Call Audit #22), surfaced on the health dashboard. The cached-call +/// counterpart of ; +/// mirrors its shape so the Central UI Site Calls KPI tiles can reuse the +/// Notification Outbox tile layout. +/// +/// +/// Count of non-terminal rows (Pending + Retrying) — calls +/// buffered at sites awaiting retry. +/// +/// Count of rows in the Parked status. +/// +/// Count of Failed rows whose +/// is at or after the supplied "since" timestamp. +/// +/// +/// Count of Delivered rows whose +/// is at or after the supplied "since" timestamp. +/// +/// +/// Age of the oldest non-terminal row (now - min(CreatedAtUtc)), or +/// null when there are no non-terminal rows. +/// +/// +/// Count of non-terminal rows (Pending/Retrying) whose +/// is older +/// than the supplied stuck cutoff. Display-only — no escalation. +/// +public sealed record SiteCallKpiSnapshot( + int BufferedCount, + int ParkedCount, + int FailedLastInterval, + int DeliveredLastInterval, + TimeSpan? OldestPendingAge, + int StuckCount); diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs new file mode 100644 index 0000000..c67c895 --- /dev/null +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs @@ -0,0 +1,34 @@ +namespace ScadaLink.Commons.Types.Audit; + +/// +/// Point-in-time SiteCalls metrics scoped to a single source site. The +/// per-site counterpart of ; surfaced in the +/// per-site breakdown table on the Site Calls KPIs page. Mirrors +/// . +/// +/// The site identifier these metrics are scoped to. +/// Count of this site's non-terminal rows (Pending + Retrying). +/// Count of this site's rows in the Parked status. +/// +/// Count of this site's Failed rows whose TerminalAtUtc is at or +/// after the "since" timestamp. +/// +/// +/// Count of this site's Delivered rows whose TerminalAtUtc is at +/// or after the "since" timestamp. +/// +/// +/// Age of this site's oldest non-terminal row, or null when it has none. +/// +/// +/// Count of this site's non-terminal rows whose CreatedAtUtc is older +/// than the stuck cutoff. +/// +public sealed record SiteCallSiteKpiSnapshot( + string SourceSite, + int BufferedCount, + int ParkedCount, + int FailedLastInterval, + int DeliveredLastInterval, + TimeSpan? OldestPendingAge, + int StuckCount); diff --git a/src/ScadaLink.Communication/CommunicationService.cs b/src/ScadaLink.Communication/CommunicationService.cs index a6ea2c7..c83901b 100644 --- a/src/ScadaLink.Communication/CommunicationService.cs +++ b/src/ScadaLink.Communication/CommunicationService.cs @@ -2,6 +2,7 @@ using Akka.Actor; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using ScadaLink.Commons.Messages.Artifacts; +using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Messages.DebugView; using ScadaLink.Commons.Messages.Deployment; using ScadaLink.Commons.Messages.Health; @@ -25,6 +26,7 @@ public class CommunicationService private readonly ILogger _logger; private IActorRef? _centralCommunicationActor; private IActorRef? _notificationOutboxProxy; + private IActorRef? _siteCallAuditProxy; public CommunicationService( IOptions options, @@ -52,6 +54,17 @@ public class CommunicationService _notificationOutboxProxy = notificationOutboxProxy; } + /// + /// Sets the Site Call Audit (#22) singleton proxy reference. Called during + /// actor system startup. The Site Call Audit actor is central-local, so Site + /// Calls read calls Ask this proxy directly (no SiteEnvelope routing), the + /// same pattern as . + /// + public void SetSiteCallAudit(IActorRef siteCallAuditProxy) + { + _siteCallAuditProxy = siteCallAuditProxy; + } + /// /// Triggers an immediate refresh of the site address cache from the database. /// @@ -80,6 +93,15 @@ public class CommunicationService ?? throw new InvalidOperationException("CommunicationService not initialized. NotificationOutbox proxy not set."); } + /// + /// Gets the Site Call Audit proxy reference. Throws if not yet initialized. + /// + private IActorRef GetSiteCallAudit() + { + return _siteCallAuditProxy + ?? throw new InvalidOperationException("CommunicationService not initialized. SiteCallAudit proxy not set."); + } + // ── Pattern 1: Instance Deployment ── public async Task DeployInstanceAsync( @@ -295,6 +317,36 @@ public class CommunicationService return await GetNotificationOutbox().Ask( request, _options.QueryTimeout, cancellationToken); } + + // ── Site Call Audit (central-local actor — Asked directly, no SiteEnvelope) ── + + public async Task QuerySiteCallsAsync( + SiteCallQueryRequest request, CancellationToken cancellationToken = default) + { + return await GetSiteCallAudit().Ask( + request, _options.QueryTimeout, cancellationToken); + } + + public async Task GetSiteCallDetailAsync( + SiteCallDetailRequest request, CancellationToken cancellationToken = default) + { + return await GetSiteCallAudit().Ask( + request, _options.QueryTimeout, cancellationToken); + } + + public async Task GetSiteCallKpisAsync( + SiteCallKpiRequest request, CancellationToken cancellationToken = default) + { + return await GetSiteCallAudit().Ask( + request, _options.QueryTimeout, cancellationToken); + } + + public async Task GetPerSiteSiteCallKpisAsync( + PerSiteSiteCallKpiRequest request, CancellationToken cancellationToken = default) + { + return await GetSiteCallAudit().Ask( + request, _options.QueryTimeout, cancellationToken); + } } /// diff --git a/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs b/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs index 3fdff7e..954e490 100644 --- a/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs +++ b/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs @@ -201,6 +201,141 @@ ORDER BY CreatedAtUtc DESC, TrackedOperationId DESC;"; ct); } + // Terminal status string literals for the interval-throughput KPIs. The + // Status column is a plain varchar (no value converter), so these compare + // directly in translated SQL. + // + // NOTE on the "buffered/non-terminal" definition: the SiteCalls operational + // mirror stores AuditStatus-derived strings (Attempted/Delivered/Parked/ + // Failed/...), NOT the tracking-lifecycle Pending/Retrying names the spec's + // KPI section uses. There is therefore no Status string that means + // "buffered". The schema-honest predicate for "non-terminal / buffered" is + // TerminalAtUtc IS NULL — consistent with PurgeTerminalAsync's terminal + // predicate and with the SiteCall entity's own contract ("TerminalAtUtc ... + // null while still active"). All buffered / stuck / oldest-pending counts + // below key off TerminalAtUtc, not Status. + private const string StatusParked = "Parked"; + private const string StatusDelivered = "Delivered"; + private const string StatusFailed = "Failed"; + + /// + /// Computes the global KPI snapshot with five server-side aggregate queries + /// against dbo.SiteCalls. No rows are materialised — every count is a + /// translated COUNT and the oldest-pending age is a translated + /// MIN(CreatedAtUtc). The Status and CreatedAtUtc/TerminalAtUtc + /// columns have no value converter, so the aggregates translate cleanly to + /// SQL Server (unlike the NotificationOutbox's DateTimeOffset-converted + /// column, which forces an order-and-take). "Buffered" / "stuck" key off + /// TerminalAtUtc IS NULL — see the field comments above. + /// + public async Task ComputeKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) + { + var now = DateTime.UtcNow; + + var bufferedCount = await _context.SiteCalls + .CountAsync(s => s.TerminalAtUtc == null, ct); + + var parkedCount = await _context.SiteCalls + .CountAsync(s => s.Status == StatusParked, ct); + + var failedLastInterval = await _context.SiteCalls + .CountAsync(s => s.Status == StatusFailed + && s.TerminalAtUtc != null + && s.TerminalAtUtc >= intervalSince, ct); + + var deliveredLastInterval = await _context.SiteCalls + .CountAsync(s => s.Status == StatusDelivered + && s.TerminalAtUtc != null + && s.TerminalAtUtc >= intervalSince, ct); + + var stuckCount = await _context.SiteCalls + .CountAsync(s => s.TerminalAtUtc == null && s.CreatedAtUtc < stuckCutoff, ct); + + var nonTerminal = _context.SiteCalls.Where(s => s.TerminalAtUtc == null); + + TimeSpan? oldestPendingAge = null; + if (await nonTerminal.AnyAsync(ct)) + { + var oldestCreatedAt = await nonTerminal.MinAsync(s => s.CreatedAtUtc, ct); + oldestPendingAge = now - oldestCreatedAt; + } + + return new SiteCallKpiSnapshot( + BufferedCount: bufferedCount, + ParkedCount: parkedCount, + FailedLastInterval: failedLastInterval, + DeliveredLastInterval: deliveredLastInterval, + OldestPendingAge: oldestPendingAge, + StuckCount: stuckCount); + } + + /// + /// Computes the per-source-site KPI breakdown. The five counts are + /// GROUP BY SourceSite aggregates; the oldest-pending age is a + /// per-site MIN(CreatedAtUtc) over the (bounded) non-terminal set — + /// all run server-side. A site appears in the result only if it has at + /// least one row matched by one of the count queries. "Buffered" / "stuck" + /// key off TerminalAtUtc IS NULL — see . + /// + public async Task> ComputePerSiteKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) + { + var now = DateTime.UtcNow; + + var buffered = await CountBySiteAsync(s => s.TerminalAtUtc == null, ct); + + var parked = await CountBySiteAsync(s => s.Status == StatusParked, ct); + + var failed = await CountBySiteAsync( + s => s.Status == StatusFailed + && s.TerminalAtUtc != null && s.TerminalAtUtc >= intervalSince, ct); + + var delivered = await CountBySiteAsync( + s => s.Status == StatusDelivered + && s.TerminalAtUtc != null && s.TerminalAtUtc >= intervalSince, ct); + + var stuck = await CountBySiteAsync( + s => s.TerminalAtUtc == null && s.CreatedAtUtc < stuckCutoff, ct); + + // Oldest non-terminal CreatedAtUtc per site — a server-side GROUP BY MIN. + var oldest = (await _context.SiteCalls + .Where(s => s.TerminalAtUtc == null) + .GroupBy(s => s.SourceSite) + .Select(g => new { Site = g.Key, Oldest = g.Min(s => s.CreatedAtUtc) }) + .ToListAsync(ct)) + .ToDictionary(x => x.Site, x => x.Oldest); + + var siteIds = buffered.Keys + .Concat(parked.Keys).Concat(failed.Keys) + .Concat(delivered.Keys).Concat(stuck.Keys) + .Distinct() + .OrderBy(s => s, StringComparer.Ordinal); + + return siteIds.Select(site => new SiteCallSiteKpiSnapshot( + SourceSite: site, + BufferedCount: buffered.GetValueOrDefault(site), + ParkedCount: parked.GetValueOrDefault(site), + FailedLastInterval: failed.GetValueOrDefault(site), + DeliveredLastInterval: delivered.GetValueOrDefault(site), + OldestPendingAge: oldest.TryGetValue(site, out var createdAt) + ? now - createdAt + : null, + StuckCount: stuck.GetValueOrDefault(site))).ToList(); + } + + /// Counts SiteCalls rows matching , grouped by source site. + private async Task> CountBySiteAsync( + System.Linq.Expressions.Expression> predicate, + CancellationToken ct) + { + return await _context.SiteCalls + .Where(predicate) + .GroupBy(s => s.SourceSite) + .Select(g => new { Site = g.Key, Count = g.Count() }) + .ToDictionaryAsync(x => x.Site, x => x.Count, ct); + } + private static int GetRankOrThrow(string status) { if (!StatusRank.TryGetValue(status, out var rank)) diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index ac9bb89..4708744 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -423,10 +423,13 @@ akka {{ // is a scoped EF Core service. var siteCallAuditLogger = _serviceProvider.GetRequiredService() .CreateLogger(); + var siteCallAuditOptions = _serviceProvider + .GetRequiredService>().Value; var siteCallAuditSingletonProps = ClusterSingletonManager.Props( singletonProps: Props.Create(() => new ScadaLink.SiteCallAudit.SiteCallAuditActor( _serviceProvider, + siteCallAuditOptions, siteCallAuditLogger)), terminationMessage: PoisonPill.Instance, settings: ClusterSingletonManagerSettings.Create(_actorSystem!) @@ -437,7 +440,12 @@ akka {{ singletonManagerPath: "/user/site-call-audit-singleton", settings: ClusterSingletonProxySettings.Create(_actorSystem) .WithSingletonName("site-call-audit")); - _actorSystem.ActorOf(siteCallAuditProxyProps, "site-call-audit-proxy"); + var siteCallAuditProxy = _actorSystem.ActorOf(siteCallAuditProxyProps, "site-call-audit-proxy"); + + // Hand the proxy to the CommunicationService so the Central UI can Ask + // the Site Call Audit actor directly (query, KPIs, detail) — mirrors the + // SetNotificationOutbox wiring above. + commService?.SetSiteCallAudit(siteCallAuditProxy); _logger.LogInformation("SiteCallAuditActor singleton created"); _logger.LogInformation("Central actors registered. CentralCommunicationActor created."); diff --git a/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj b/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj index d8b0e7a..7603dd6 100644 --- a/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj +++ b/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj @@ -13,6 +13,8 @@ + + diff --git a/src/ScadaLink.SiteCallAudit/ServiceCollectionExtensions.cs b/src/ScadaLink.SiteCallAudit/ServiceCollectionExtensions.cs index 6eb0d80..e764b68 100644 --- a/src/ScadaLink.SiteCallAudit/ServiceCollectionExtensions.cs +++ b/src/ScadaLink.SiteCallAudit/ServiceCollectionExtensions.cs @@ -7,33 +7,34 @@ namespace ScadaLink.SiteCallAudit; /// /// /// -/// M3 Bundle C ships the ingest-only minimum surface (the actor itself); the -/// full DI surface — reconciliation puller, KPI projector, central→site -/// Retry/Discard relay, options + validators — is deferred to a follow-up. +/// Binds (stuck-call detection + KPI +/// windowing for the read-side query/KPI handlers). The reconciliation puller +/// and central→site Retry/Discard relay are still deferred to later follow-ups. /// /// /// The repository (ISiteCallAuditRepository) is registered by /// ScadaLink.ConfigurationDatabase.ServiceCollectionExtensions.AddConfigurationDatabase, /// so callers (the Host on the central node) must also call that. The actor's -/// Props are wired up in Host registration (Bundle F); this extension -/// is currently a no-op placeholder kept for symmetry with the AuditLog and -/// NotificationOutbox composition roots — adding it now means consumers can -/// reference the method without re-touching the Host project later. +/// Props are wired up in Host registration. /// /// public static class ServiceCollectionExtensions { + /// Configuration section bound to . + public const string OptionsSection = "ScadaLink:SiteCallAudit"; + /// - /// Registers Site Call Audit (#22) services. Currently a no-op - /// placeholder — Bundle F will populate this with the actor's Props - /// factory + options bindings. The method is exposed now so the Host - /// wiring call already exists at the API boundary. + /// Registers Site Call Audit (#22) services: the + /// binding consumed by the actor's read-side KPI/query handlers. The actor's + /// Props are still constructed inline in Host wiring. /// public static IServiceCollection AddSiteCallAudit(this IServiceCollection services) { ArgumentNullException.ThrowIfNull(services); - // Actor props are constructed in Host wiring (Bundle F). This - // extension is a placeholder for future config + DI. + + services.AddOptions() + .BindConfiguration(OptionsSection); + return services; } } diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs index 7506681..a6537dd 100644 --- a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs @@ -1,8 +1,11 @@ using Akka.Actor; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Interfaces.Repositories; using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Commons.Types; +using ScadaLink.Commons.Types.Audit; namespace ScadaLink.SiteCallAudit; @@ -42,26 +45,34 @@ namespace ScadaLink.SiteCallAudit; /// public class SiteCallAuditActor : ReceiveActor { + /// Maximum page size honoured by a . + private const int MaxPageSize = 200; + private readonly IServiceProvider? _serviceProvider; private readonly ISiteCallAuditRepository? _injectedRepository; + private readonly SiteCallAuditOptions _options; private readonly ILogger _logger; /// /// Test-mode constructor — injects a concrete repository instance whose /// lifetime exceeds the test, so the actor reuses the same instance /// across every message. Used by Bundle C's MSSQL-backed TestKit fixture. + /// An optional lets a test pin the stuck/KPI + /// windows; when omitted the production defaults apply. /// public SiteCallAuditActor( ISiteCallAuditRepository repository, - ILogger logger) + ILogger logger, + SiteCallAuditOptions? options = null) { ArgumentNullException.ThrowIfNull(repository); ArgumentNullException.ThrowIfNull(logger); _injectedRepository = repository; _logger = logger; + _options = options ?? new SiteCallAuditOptions(); - ReceiveAsync(OnUpsertAsync); + RegisterHandlers(); } /// @@ -73,15 +84,33 @@ public class SiteCallAuditActor : ReceiveActor /// public SiteCallAuditActor( IServiceProvider serviceProvider, + SiteCallAuditOptions options, ILogger logger) { ArgumentNullException.ThrowIfNull(serviceProvider); + ArgumentNullException.ThrowIfNull(options); ArgumentNullException.ThrowIfNull(logger); _serviceProvider = serviceProvider; + _options = options; _logger = logger; + RegisterHandlers(); + } + + /// + /// Wires up the message handlers shared by both constructors: the M3 + /// ingest path plus the Task 4 read-side (query, detail, global + per-site + /// KPI). All read handlers reply to an Ask, so they capture Sender + /// before the first await and PipeTo the result back. + /// + private void RegisterHandlers() + { ReceiveAsync(OnUpsertAsync); + Receive(HandleQuery); + Receive(HandleDetail); + Receive(HandleKpi); + Receive(HandlePerSiteKpi); } /// @@ -137,4 +166,305 @@ public class SiteCallAuditActor : ReceiveActor scope?.Dispose(); } } + + // ── Task 4: read-side (query / detail / KPI) ── + + /// + /// Handles a paginated, filtered query over the SiteCalls table. + /// Builds a + + /// keyset cursor from the request, runs the query on a scoped repository, + /// and pipes the mapped response back to the captured sender. A repository + /// fault yields a failure response with an empty list. + /// + private void HandleQuery(SiteCallQueryRequest request) + { + var sender = Sender; + var now = DateTime.UtcNow; + + QueryAsync(request, now).PipeTo( + sender, + success: response => response, + failure: ex => new SiteCallQueryResponse( + request.CorrelationId, + Success: false, + ErrorMessage: ex.GetBaseException().Message, + SiteCalls: Array.Empty(), + NextAfterCreatedAtUtc: null, + NextAfterId: null)); + } + + private async Task QueryAsync(SiteCallQueryRequest request, DateTime now) + { + var filter = new SiteCallQueryFilter( + Channel: NullIfBlank(request.ChannelFilter), + SourceSite: NullIfBlank(request.SourceSiteFilter), + Status: NullIfBlank(request.StatusFilter), + Target: NullIfBlank(request.TargetKeyword), + FromUtc: request.FromUtc, + ToUtc: request.ToUtc); + + var pageSize = Math.Clamp(request.PageSize, 1, MaxPageSize); + var paging = new SiteCallPaging( + PageSize: pageSize, + AfterCreatedAtUtc: request.AfterCreatedAtUtc, + AfterId: request.AfterId is { } id ? new TrackedOperationId(id) : null); + + var (scope, repository) = ResolveRepository(); + try + { + var rows = await repository.QueryAsync(filter, paging).ConfigureAwait(false); + + var stuckCutoff = now - _options.StuckAgeThreshold; + var summaries = rows + // StuckOnly is post-filtered here rather than pushed into the + // repository SQL — the SiteCallQueryFilter has no stuck predicate + // and a status-aware created-before clause does not compose with + // the keyset cursor. The page may therefore return fewer than + // PageSize rows when StuckOnly is set; that is acceptable for a + // display-only filter. + .Where(row => !request.StuckOnly || IsStuck(row, stuckCutoff)) + .Select(row => ToSummary(row, stuckCutoff)) + .ToList(); + + // The next-page cursor is the LAST row of the materialised page — + // before StuckOnly post-filtering, so paging still advances even + // when every row on a page was filtered out. + var cursorRow = rows.Count > 0 ? rows[^1] : null; + + return new SiteCallQueryResponse( + request.CorrelationId, + Success: true, + ErrorMessage: null, + SiteCalls: summaries, + NextAfterCreatedAtUtc: cursorRow?.CreatedAtUtc, + NextAfterId: cursorRow?.TrackedOperationId.Value); + } + finally + { + scope?.Dispose(); + } + } + + /// + /// Handles a full-detail query for a single cached call — backs the report + /// detail modal. A missing row yields Success=false with a "not + /// found" message; a repository fault yields Success=false with the + /// fault message. + /// + private void HandleDetail(SiteCallDetailRequest request) + { + var sender = Sender; + + DetailAsync(request).PipeTo( + sender, + success: response => response, + failure: ex => new SiteCallDetailResponse( + request.CorrelationId, + Success: false, + ErrorMessage: ex.GetBaseException().Message, + Detail: null)); + } + + private async Task DetailAsync(SiteCallDetailRequest request) + { + var (scope, repository) = ResolveRepository(); + try + { + var row = await repository + .GetAsync(new TrackedOperationId(request.TrackedOperationId)) + .ConfigureAwait(false); + + if (row is null) + { + return new SiteCallDetailResponse( + request.CorrelationId, + Success: false, + ErrorMessage: "site call not found", + Detail: null); + } + + return new SiteCallDetailResponse( + request.CorrelationId, + Success: true, + ErrorMessage: null, + Detail: ToDetail(row)); + } + finally + { + scope?.Dispose(); + } + } + + /// + /// Handles a global KPI snapshot request, deriving the stuck cutoff from + /// and the + /// failed/delivered interval bound from . + /// + private void HandleKpi(SiteCallKpiRequest request) + { + var sender = Sender; + var now = DateTime.UtcNow; + var stuckCutoff = now - _options.StuckAgeThreshold; + var intervalSince = now - _options.KpiInterval; + + KpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo( + sender, + success: response => response, + failure: ex => new SiteCallKpiResponse( + request.CorrelationId, + Success: false, + ErrorMessage: ex.GetBaseException().Message, + BufferedCount: 0, + ParkedCount: 0, + FailedLastInterval: 0, + DeliveredLastInterval: 0, + OldestPendingAge: null, + StuckCount: 0)); + } + + private async Task KpiAsync( + string correlationId, DateTime stuckCutoff, DateTime intervalSince) + { + var (scope, repository) = ResolveRepository(); + try + { + var snapshot = await repository + .ComputeKpisAsync(stuckCutoff, intervalSince) + .ConfigureAwait(false); + + return new SiteCallKpiResponse( + correlationId, + Success: true, + ErrorMessage: null, + snapshot.BufferedCount, + snapshot.ParkedCount, + snapshot.FailedLastInterval, + snapshot.DeliveredLastInterval, + snapshot.OldestPendingAge, + snapshot.StuckCount); + } + finally + { + scope?.Dispose(); + } + } + + /// + /// Handles a per-source-site KPI request, using the same stuck cutoff and + /// interval bound as . + /// + private void HandlePerSiteKpi(PerSiteSiteCallKpiRequest request) + { + var sender = Sender; + var now = DateTime.UtcNow; + var stuckCutoff = now - _options.StuckAgeThreshold; + var intervalSince = now - _options.KpiInterval; + + PerSiteKpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo( + sender, + success: response => response, + failure: ex => new PerSiteSiteCallKpiResponse( + request.CorrelationId, + Success: false, + ErrorMessage: ex.GetBaseException().Message, + Sites: Array.Empty())); + } + + private async Task PerSiteKpiAsync( + string correlationId, DateTime stuckCutoff, DateTime intervalSince) + { + var (scope, repository) = ResolveRepository(); + try + { + var sites = await repository + .ComputePerSiteKpisAsync(stuckCutoff, intervalSince) + .ConfigureAwait(false); + + return new PerSiteSiteCallKpiResponse( + correlationId, Success: true, ErrorMessage: null, sites); + } + finally + { + scope?.Dispose(); + } + } + + /// + /// Resolves an for one read message. + /// In test mode the injected instance is returned with a null scope; in + /// production a fresh DI scope is created and returned so the caller can + /// dispose it once the read completes — the same scope-per-message pattern + /// as . + /// + private (IServiceScope? Scope, ISiteCallAuditRepository Repository) ResolveRepository() + { + if (_injectedRepository is not null) + { + return (null, _injectedRepository); + } + + var scope = _serviceProvider!.CreateScope(); + return (scope, scope.ServiceProvider.GetRequiredService()); + } + + /// + /// A cached call counts as stuck when it is still non-terminal and was + /// created before . Non-terminal is keyed off + /// being null — the + /// SiteCalls operational mirror stores AuditStatus-derived + /// status strings (Attempted/Delivered/Parked/...), not + /// the tracking-lifecycle Pending/Retrying names the spec's + /// KPI section uses, so there is no status string that means "buffered". + /// TerminalAtUtc is the entity's own active/terminal discriminator + /// and is consistent with the repository KPI counts and + /// PurgeTerminalAsync. + /// + private static bool IsStuck(SiteCall row, DateTime stuckCutoff) + { + return row.TerminalAtUtc is null && row.CreatedAtUtc < stuckCutoff; + } + + private static SiteCallSummary ToSummary(SiteCall row, DateTime stuckCutoff) + { + return new SiteCallSummary( + TrackedOperationId: row.TrackedOperationId.Value, + SourceSite: row.SourceSite, + Channel: row.Channel, + Target: row.Target, + Status: row.Status, + RetryCount: row.RetryCount, + LastError: row.LastError, + HttpStatus: row.HttpStatus, + CreatedAtUtc: row.CreatedAtUtc, + UpdatedAtUtc: row.UpdatedAtUtc, + TerminalAtUtc: row.TerminalAtUtc, + IsStuck: IsStuck(row, stuckCutoff)); + } + + private static SiteCallDetail ToDetail(SiteCall row) + { + return new SiteCallDetail( + TrackedOperationId: row.TrackedOperationId.Value, + SourceSite: row.SourceSite, + Channel: row.Channel, + Target: row.Target, + Status: row.Status, + RetryCount: row.RetryCount, + LastError: row.LastError, + HttpStatus: row.HttpStatus, + CreatedAtUtc: row.CreatedAtUtc, + UpdatedAtUtc: row.UpdatedAtUtc, + TerminalAtUtc: row.TerminalAtUtc, + IngestedAtUtc: row.IngestedAtUtc); + } + + /// + /// Treats an empty/whitespace filter string as "no constraint" — the + /// repository's interprets null as + /// a no-op predicate, so a blank UI filter must collapse to null. + /// + private static string? NullIfBlank(string? value) + { + return string.IsNullOrWhiteSpace(value) ? null : value; + } } diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs new file mode 100644 index 0000000..572fec6 --- /dev/null +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs @@ -0,0 +1,26 @@ +namespace ScadaLink.SiteCallAudit; + +/// +/// Configuration options for the Site Call Audit (#22) read-side: stuck-call +/// detection and KPI windowing. Mirrors the KPI-relevant subset of +/// NotificationOutboxOptions — the reconciliation, purge and dispatch +/// cadence options the Notification Outbox carries are not part of the Site +/// Call Audit read-side backend and are deliberately omitted here. +/// +public class SiteCallAuditOptions +{ + /// + /// Age past which a non-terminal cached call (Pending/Retrying) + /// is considered stuck. Display-only — surfaced as the Stuck KPI and a row + /// badge, with no escalation. Default 10 minutes, matching + /// NotificationOutboxOptions.StuckAgeThreshold. + /// + public TimeSpan StuckAgeThreshold { get; set; } = TimeSpan.FromMinutes(10); + + /// + /// Trailing window used to compute the delivered- and failed-last-interval + /// throughput KPIs. Default 1 minute, matching + /// NotificationOutboxOptions.DeliveredKpiWindow. + /// + public TimeSpan KpiInterval { get; set; } = TimeSpan.FromMinutes(1); +} diff --git a/tests/ScadaLink.AuditLog.Tests/Central/AuditLogIngestActorCombinedTelemetryTests.cs b/tests/ScadaLink.AuditLog.Tests/Central/AuditLogIngestActorCombinedTelemetryTests.cs index f61d1cd..7e3d1ea 100644 --- a/tests/ScadaLink.AuditLog.Tests/Central/AuditLogIngestActorCombinedTelemetryTests.cs +++ b/tests/ScadaLink.AuditLog.Tests/Central/AuditLogIngestActorCombinedTelemetryTests.cs @@ -356,6 +356,12 @@ public class AuditLogIngestActorCombinedTelemetryTests : TestKit, IClassFixture< _inner.QueryAsync(filter, paging, ct); public Task PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default) => _inner.PurgeTerminalAsync(olderThanUtc, ct); + public Task ComputeKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputeKpisAsync(stuckCutoff, intervalSince, ct); + public Task> ComputePerSiteKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct); } /// @@ -387,5 +393,11 @@ public class AuditLogIngestActorCombinedTelemetryTests : TestKit, IClassFixture< _inner.QueryAsync(filter, paging, ct); public Task PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default) => _inner.PurgeTerminalAsync(olderThanUtc, ct); + public Task ComputeKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputeKpisAsync(stuckCutoff, intervalSince, ct); + public Task> ComputePerSiteKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct); } } diff --git a/tests/ScadaLink.Commons.Tests/Messages/SiteCallQueriesTests.cs b/tests/ScadaLink.Commons.Tests/Messages/SiteCallQueriesTests.cs new file mode 100644 index 0000000..a702e14 --- /dev/null +++ b/tests/ScadaLink.Commons.Tests/Messages/SiteCallQueriesTests.cs @@ -0,0 +1,128 @@ +using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Commons.Types.Audit; + +namespace ScadaLink.Commons.Tests.Messages; + +/// +/// Site Call Audit (#22): construction, value-equality and optionality tests +/// for the Site Calls UI query / KPI / detail message contracts. Mirrors the +/// Notification Outbox NotificationMessagesTests coverage of the read +/// side, scoped to the contracts the Site Calls page consumes. +/// +public class SiteCallQueriesTests +{ + [Fact] + public void SiteCallQueryRequest_PositionalConstruction_SetsAllFields() + { + var afterCreated = DateTime.UtcNow; + var afterId = Guid.NewGuid(); + var request = new SiteCallQueryRequest( + "corr-1", "Parked", "plant-a", "ApiOutbound", "ERP.GetOrder", true, + new DateTime(2026, 5, 1), new DateTime(2026, 5, 20), afterCreated, afterId, 50); + + Assert.Equal("corr-1", request.CorrelationId); + Assert.Equal("Parked", request.StatusFilter); + Assert.Equal("plant-a", request.SourceSiteFilter); + Assert.Equal("ApiOutbound", request.ChannelFilter); + Assert.Equal("ERP.GetOrder", request.TargetKeyword); + Assert.True(request.StuckOnly); + Assert.Equal(new DateTime(2026, 5, 1), request.FromUtc); + Assert.Equal(new DateTime(2026, 5, 20), request.ToUtc); + Assert.Equal(afterCreated, request.AfterCreatedAtUtc); + Assert.Equal(afterId, request.AfterId); + Assert.Equal(50, request.PageSize); + } + + [Fact] + public void SiteCallQueryRequest_AllowsNullOptionalFilters() + { + var request = new SiteCallQueryRequest( + "corr-2", null, null, null, null, false, null, null, null, null, 25); + + Assert.Null(request.StatusFilter); + Assert.Null(request.SourceSiteFilter); + Assert.Null(request.ChannelFilter); + Assert.Null(request.TargetKeyword); + Assert.False(request.StuckOnly); + Assert.Null(request.FromUtc); + Assert.Null(request.AfterId); + } + + [Fact] + public void SiteCallQueryResponse_ValueEquality_EqualWhenAllFieldsMatch() + { + var a = new SiteCallQueryResponse("c", true, null, Array.Empty(), null, null); + var b = new SiteCallQueryResponse("c", true, null, Array.Empty(), null, null); + + Assert.Equal(a, b); + Assert.Equal(a.GetHashCode(), b.GetHashCode()); + } + + [Fact] + public void SiteCallSummary_CarriesEntityColumnsAndStuckFlag() + { + var id = Guid.NewGuid(); + var created = DateTime.UtcNow.AddMinutes(-30); + var summary = new SiteCallSummary( + id, "plant-a", "DbOutbound", "InventoryDb", "Retrying", 3, + "transient 503", 503, created, created.AddMinutes(1), null, IsStuck: true); + + Assert.Equal(id, summary.TrackedOperationId); + Assert.Equal("DbOutbound", summary.Channel); + Assert.Equal("InventoryDb", summary.Target); + Assert.Equal("Retrying", summary.Status); + Assert.Equal(3, summary.RetryCount); + Assert.Equal(503, summary.HttpStatus); + Assert.Null(summary.TerminalAtUtc); + Assert.True(summary.IsStuck); + } + + [Fact] + public void SiteCallDetailResponse_MissingRow_HasNullDetail() + { + var response = new SiteCallDetailResponse("c", false, "site call not found", null); + + Assert.False(response.Success); + Assert.Null(response.Detail); + Assert.Equal("site call not found", response.ErrorMessage); + } + + [Fact] + public void SiteCallKpiResponse_FailureShape_ZeroesKpiFields() + { + var response = new SiteCallKpiResponse( + "c", Success: false, ErrorMessage: "db down", + BufferedCount: 0, ParkedCount: 0, FailedLastInterval: 0, + DeliveredLastInterval: 0, OldestPendingAge: null, StuckCount: 0); + + Assert.False(response.Success); + Assert.Equal("db down", response.ErrorMessage); + Assert.Equal(0, response.BufferedCount); + Assert.Null(response.OldestPendingAge); + } + + [Fact] + public void PerSiteSiteCallKpiResponse_CarriesPerSiteSnapshots() + { + var response = new PerSiteSiteCallKpiResponse( + "c", true, null, + new[] + { + new SiteCallSiteKpiSnapshot("plant-a", 4, 1, 0, 9, TimeSpan.FromMinutes(15), 2), + }); + + Assert.True(response.Success); + var site = Assert.Single(response.Sites); + Assert.Equal("plant-a", site.SourceSite); + Assert.Equal(4, site.BufferedCount); + Assert.Equal(2, site.StuckCount); + Assert.Equal(TimeSpan.FromMinutes(15), site.OldestPendingAge); + } + + [Fact] + public void SiteCallKpiSnapshot_OldestPendingAge_IsNullableForEmptyTable() + { + var snapshot = new SiteCallKpiSnapshot(0, 0, 0, 0, null, 0); + Assert.Null(snapshot.OldestPendingAge); + } +} diff --git a/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs b/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs index 7ac43dc..b1a1de8 100644 --- a/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs +++ b/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs @@ -2,8 +2,10 @@ using Akka.Actor; using Akka.TestKit.Xunit2; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; +using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Messages.Deployment; using ScadaLink.Commons.Messages.Notification; +using ScadaLink.Commons.Types.Audit; using ScadaLink.Commons.Types.Notifications; namespace ScadaLink.Communication.Tests; @@ -236,6 +238,150 @@ public class CommunicationServiceTests : TestKit Assert.Equal("plant-a", result.Sites[0].SourceSiteId); } + // ── Site Call Audit: central-side audit actor calls ── + + [Fact] + public async Task QuerySiteCallsAsync_BeforeSiteCallAuditSet_Throws() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + + await Assert.ThrowsAsync(() => + service.QuerySiteCallsAsync(new SiteCallQueryRequest( + "corr-1", null, null, null, null, false, null, null, null, null, 50))); + } + + [Fact] + public async Task GetSiteCallKpisAsync_BeforeSiteCallAuditSet_Throws() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + + await Assert.ThrowsAsync(() => + service.GetSiteCallKpisAsync(new SiteCallKpiRequest("corr-1"))); + } + + [Fact] + public async Task GetSiteCallDetailAsync_BeforeSiteCallAuditSet_Throws() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + + await Assert.ThrowsAsync(() => + service.GetSiteCallDetailAsync(new SiteCallDetailRequest("corr-1", Guid.NewGuid()))); + } + + [Fact] + public async Task GetPerSiteSiteCallKpisAsync_BeforeSiteCallAuditSet_Throws() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + + await Assert.ThrowsAsync(() => + service.GetPerSiteSiteCallKpisAsync(new PerSiteSiteCallKpiRequest("corr-1"))); + } + + [Fact] + public async Task QuerySiteCallsAsync_AsksSiteCallAuditProxyDirectly() + { + // The Site Call Audit actor is central-local: the request must be Asked + // directly to its proxy (no SiteEnvelope wrapping). + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + var probe = CreateTestProbe(); + service.SetSiteCallAudit(probe.Ref); + + var request = new SiteCallQueryRequest( + "corr-q", "Parked", "plant-a", "ApiOutbound", "ERP.GetOrder", true, + null, null, null, null, 25); + var task = service.QuerySiteCallsAsync(request); + + var received = probe.ExpectMsg(); + Assert.Same(request, received); + var reply = new SiteCallQueryResponse( + "corr-q", true, null, Array.Empty(), null, null); + probe.Reply(reply); + + Assert.Same(reply, await task); + } + + [Fact] + public async Task GetSiteCallDetailAsync_AsksSiteCallAuditProxyDirectly() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + var probe = CreateTestProbe(); + service.SetSiteCallAudit(probe.Ref); + + var request = new SiteCallDetailRequest("corr-d", Guid.NewGuid()); + var task = service.GetSiteCallDetailAsync(request); + + var received = probe.ExpectMsg(); + Assert.Same(request, received); + var reply = new SiteCallDetailResponse("corr-d", false, "site call not found", null); + probe.Reply(reply); + + var result = await task; + Assert.Same(reply, result); + Assert.False(result.Success); + } + + [Fact] + public async Task GetSiteCallKpisAsync_AsksSiteCallAuditProxyDirectly() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + var probe = CreateTestProbe(); + service.SetSiteCallAudit(probe.Ref); + + var request = new SiteCallKpiRequest("corr-k"); + var task = service.GetSiteCallKpisAsync(request); + + var received = probe.ExpectMsg(); + Assert.Same(request, received); + var reply = new SiteCallKpiResponse( + "corr-k", true, null, 4, 1, 2, 9, TimeSpan.FromMinutes(7), 1); + probe.Reply(reply); + + var result = await task; + Assert.Same(reply, result); + Assert.Equal(4, result.BufferedCount); + Assert.Equal(1, result.StuckCount); + } + + [Fact] + public async Task GetPerSiteSiteCallKpisAsync_AsksSiteCallAuditProxyDirectly() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + var probe = CreateTestProbe(); + service.SetSiteCallAudit(probe.Ref); + + var request = new PerSiteSiteCallKpiRequest("corr-ps"); + var task = service.GetPerSiteSiteCallKpisAsync(request); + + var received = probe.ExpectMsg(); + Assert.Same(request, received); + var reply = new PerSiteSiteCallKpiResponse( + "corr-ps", true, null, + new[] { new SiteCallSiteKpiSnapshot("plant-a", 3, 0, 0, 5, null, 0) }); + probe.Reply(reply); + + var result = await task; + Assert.Same(reply, result); + Assert.True(result.Success); + Assert.Single(result.Sites); + Assert.Equal("plant-a", result.Sites[0].SourceSite); + } + /// /// Stand-in for CentralCommunicationActor: verifies the message is wrapped /// in a SiteEnvelope targeting the requested site and replies with a typed diff --git a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs index ae5dd90..1156420 100644 --- a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs +++ b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs @@ -338,6 +338,104 @@ public class SiteCallAuditRepositoryTests : IClassFixture Assert.NotNull(await repo.GetAsync(recentTerminalId)); } + // --- KPI snapshot tests ------------------------------------------------- + + [SkippableFact] + public async Task ComputeKpisAsync_CountsBufferedParkedFailedDeliveredAndStuck() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var site = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + + var now = DateTime.UtcNow; + var stuckCutoff = now.AddMinutes(-10); + var intervalSince = now.AddHours(-1); + + // Buffered + stuck (non-terminal Attempted, created 30 min ago). + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), site, status: "Attempted", createdAtUtc: now.AddMinutes(-30))); + // Buffered but NOT stuck (non-terminal Attempted, created 2 min ago). + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), site, status: "Attempted", createdAtUtc: now.AddMinutes(-2))); + // Parked (terminal). + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), site, status: "Parked", + createdAtUtc: now.AddMinutes(-5), updatedAtUtc: now.AddMinutes(-4), + terminal: true, terminalAtUtc: now.AddMinutes(-4))); + // Delivered within the interval. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), site, status: "Delivered", + createdAtUtc: now.AddMinutes(-4), updatedAtUtc: now.AddMinutes(-1), + terminal: true, terminalAtUtc: now.AddMinutes(-1))); + // Failed within the interval. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), site, status: "Failed", + createdAtUtc: now.AddMinutes(-6), updatedAtUtc: now.AddMinutes(-2), + terminal: true, terminalAtUtc: now.AddMinutes(-2))); + // Delivered OUTSIDE the interval (2 hours ago) — must not count. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), site, status: "Delivered", + createdAtUtc: now.AddHours(-3), updatedAtUtc: now.AddHours(-2), + terminal: true, terminalAtUtc: now.AddHours(-2))); + + var snapshot = await repo.ComputeKpisAsync(stuckCutoff, intervalSince); + + // Counts are global; assert the floor since the table is shared with + // other tests. The OUTSIDE-interval Delivered row proves the window + // bounds the throughput counts. + Assert.True(snapshot.BufferedCount >= 2); + Assert.True(snapshot.ParkedCount >= 1); + Assert.True(snapshot.StuckCount >= 1); + Assert.True(snapshot.DeliveredLastInterval >= 1); + Assert.True(snapshot.FailedLastInterval >= 1); + Assert.NotNull(snapshot.OldestPendingAge); + Assert.True(snapshot.OldestPendingAge >= TimeSpan.FromMinutes(25)); + } + + [SkippableFact] + public async Task ComputePerSiteKpisAsync_ScopesCountsToEachSite() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var siteA = NewSiteId(); + var siteB = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + + var now = DateTime.UtcNow; + var stuckCutoff = now.AddMinutes(-10); + var intervalSince = now.AddHours(-1); + + // siteA: 2 buffered (one stuck), 1 parked. + await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteA, status: "Attempted", createdAtUtc: now.AddMinutes(-30))); + await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteA, status: "Attempted", createdAtUtc: now.AddMinutes(-2))); + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), siteA, status: "Parked", + createdAtUtc: now.AddMinutes(-5), updatedAtUtc: now.AddMinutes(-4), + terminal: true, terminalAtUtc: now.AddMinutes(-4))); + // siteB: 1 delivered within interval only. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), siteB, status: "Delivered", + createdAtUtc: now.AddMinutes(-4), updatedAtUtc: now.AddMinutes(-1), + terminal: true, terminalAtUtc: now.AddMinutes(-1))); + + var perSite = await repo.ComputePerSiteKpisAsync(stuckCutoff, intervalSince); + + var a = Assert.Single(perSite, s => s.SourceSite == siteA); + Assert.Equal(2, a.BufferedCount); + Assert.Equal(1, a.ParkedCount); + Assert.Equal(1, a.StuckCount); + Assert.NotNull(a.OldestPendingAge); + + var b = Assert.Single(perSite, s => s.SourceSite == siteB); + Assert.Equal(0, b.BufferedCount); + Assert.Equal(1, b.DeliveredLastInterval); + // siteB has no non-terminal rows — no oldest-pending age. + Assert.Null(b.OldestPendingAge); + } + // --- helpers ------------------------------------------------------------ private ScadaLinkDbContext CreateContext() diff --git a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs index e9ef807..73f3ead 100644 --- a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs +++ b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs @@ -70,10 +70,12 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture + private IActorRef CreateActor( + ISiteCallAuditRepository repository, SiteCallAuditOptions? options = null) => Sys.ActorOf(Props.Create(() => new SiteCallAuditActor( repository, - NullLogger.Instance))); + NullLogger.Instance, + options))); [SkippableFact] public async Task Receive_UpsertSiteCallCommand_Persists_Replies_Accepted() @@ -182,6 +184,291 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture(TimeSpan.FromSeconds(10)); + Assert.True(response.Success); + Assert.Equal("corr-q1", response.CorrelationId); + Assert.Equal(2, response.SiteCalls.Count); + Assert.All(response.SiteCalls, s => Assert.Equal(siteId, s.SourceSite)); + // Newest first — ordered (CreatedAtUtc DESC). + Assert.Equal("Delivered", response.SiteCalls[0].Status); + // Cursor echoes the last (oldest) row of the page. + Assert.Equal(t0, response.NextAfterCreatedAtUtc); + Assert.Equal(response.SiteCalls[^1].TrackedOperationId, response.NextAfterId); + } + + [SkippableFact] + public async Task SiteCallQueryRequest_KeysetPaging_AdvancesViaCursor() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var siteId = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + var actor = CreateActor(repo); + + var t0 = new DateTime(2026, 5, 20, 13, 0, 0, DateTimeKind.Utc); + for (var i = 0; i < 3; i++) + { + await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, createdAtUtc: t0.AddMinutes(i))); + } + + actor.Tell( + new SiteCallQueryRequest( + "corr-q2", null, siteId, null, null, false, null, null, null, null, PageSize: 2), + TestActor); + var page1 = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.Equal(2, page1.SiteCalls.Count); + + actor.Tell( + new SiteCallQueryRequest( + "corr-q3", null, siteId, null, null, false, null, null, + page1.NextAfterCreatedAtUtc, page1.NextAfterId, PageSize: 2), + TestActor); + var page2 = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.Single(page2.SiteCalls); + + // No overlap across the two pages. + var allIds = page1.SiteCalls.Concat(page2.SiteCalls) + .Select(s => s.TrackedOperationId).ToHashSet(); + Assert.Equal(3, allIds.Count); + } + + [SkippableFact] + public async Task SiteCallQueryRequest_StuckOnly_ReturnsOnlyOldNonTerminalRows() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var siteId = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + // 10-minute stuck threshold (the production default). + var actor = CreateActor(repo, new SiteCallAuditOptions { StuckAgeThreshold = TimeSpan.FromMinutes(10) }); + + var now = DateTime.UtcNow; + // Stuck: non-terminal (Attempted, TerminalAtUtc null), created 30 min ago. + var stuckId = TrackedOperationId.New(); + await repo.UpsertAsync(NewRow(stuckId, siteId, status: "Attempted", createdAtUtc: now.AddMinutes(-30))); + // Not stuck: non-terminal but recent. + await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Attempted", createdAtUtc: now.AddMinutes(-2))); + // Not stuck: old but terminal (Delivered, TerminalAtUtc set). + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), siteId, status: "Delivered", + createdAtUtc: now.AddMinutes(-40), terminal: true)); + + actor.Tell( + new SiteCallQueryRequest( + "corr-stuck", null, siteId, null, null, StuckOnly: true, + null, null, null, null, PageSize: 50), + TestActor); + + var response = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.True(response.Success); + Assert.Single(response.SiteCalls); + Assert.Equal(stuckId.Value, response.SiteCalls[0].TrackedOperationId); + Assert.True(response.SiteCalls[0].IsStuck); + } + + [SkippableFact] + public async Task SiteCallDetailRequest_KnownId_ReturnsFullDetail() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var siteId = NewSiteId(); + var id = TrackedOperationId.New(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + var actor = CreateActor(repo); + + await repo.UpsertAsync(NewRow(id, siteId, status: "Attempted", retryCount: 2, lastError: "503")); + + actor.Tell(new SiteCallDetailRequest("corr-d1", id.Value), TestActor); + + var response = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.True(response.Success); + Assert.NotNull(response.Detail); + Assert.Equal(id.Value, response.Detail!.TrackedOperationId); + Assert.Equal("Attempted", response.Detail.Status); + Assert.Equal(2, response.Detail.RetryCount); + Assert.Equal("503", response.Detail.LastError); + Assert.Equal(siteId, response.Detail.SourceSite); + } + + [SkippableFact] + public async Task SiteCallDetailRequest_UnknownId_RepliesNotFound() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + var actor = CreateActor(repo); + + actor.Tell(new SiteCallDetailRequest("corr-d2", Guid.NewGuid()), TestActor); + + var response = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.False(response.Success); + Assert.Null(response.Detail); + Assert.NotNull(response.ErrorMessage); + } + + [SkippableFact] + public async Task SiteCallKpiRequest_ComputesPointInTimeCounts() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var siteId = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + var actor = CreateActor(repo, new SiteCallAuditOptions + { + StuckAgeThreshold = TimeSpan.FromMinutes(10), + KpiInterval = TimeSpan.FromHours(1), + }); + + var now = DateTime.UtcNow; + // Buffered (non-terminal Attempted) + stuck (created 30 min ago). + await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Attempted", createdAtUtc: now.AddMinutes(-30))); + // Buffered (non-terminal Attempted), not stuck. + await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Attempted", createdAtUtc: now.AddMinutes(-2))); + // Parked (terminal). + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), siteId, status: "Parked", + createdAtUtc: now.AddMinutes(-5), terminal: true)); + // Delivered within the interval. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), siteId, status: "Delivered", + createdAtUtc: now.AddMinutes(-3), updatedAtUtc: now.AddMinutes(-1), terminal: true)); + + actor.Tell(new SiteCallKpiRequest("corr-kpi"), TestActor); + + var response = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.True(response.Success); + // Per-site rows are isolated by the unique siteId — but KPIs are global, + // so assert the floor (>=) rather than exact counts: other tests' rows + // may share the table. + Assert.True(response.BufferedCount >= 2); + Assert.True(response.ParkedCount >= 1); + Assert.True(response.DeliveredLastInterval >= 1); + Assert.True(response.StuckCount >= 1); + Assert.NotNull(response.OldestPendingAge); + } + + [SkippableFact] + public async Task PerSiteSiteCallKpiRequest_ScopesCountsToEachSite() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var siteId = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + var actor = CreateActor(repo, new SiteCallAuditOptions + { + StuckAgeThreshold = TimeSpan.FromMinutes(10), + KpiInterval = TimeSpan.FromHours(1), + }); + + var now = DateTime.UtcNow; + // Non-terminal Attempted, created 30 min ago — buffered + stuck. + await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Attempted", createdAtUtc: now.AddMinutes(-30))); + // Terminal Parked. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), siteId, status: "Parked", + createdAtUtc: now.AddMinutes(-5), terminal: true)); + + actor.Tell(new PerSiteSiteCallKpiRequest("corr-psk"), TestActor); + + var response = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.True(response.Success); + + var mySite = Assert.Single(response.Sites, s => s.SourceSite == siteId); + Assert.Equal(1, mySite.BufferedCount); + Assert.Equal(1, mySite.ParkedCount); + Assert.Equal(1, mySite.StuckCount); + Assert.NotNull(mySite.OldestPendingAge); + } + + [SkippableFact] + public async Task SiteCallQueryRequest_RepoThrows_RepliesFailure_ActorStaysAlive() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var siteId = NewSiteId(); + await using var context = CreateContext(); + var realRepo = new SiteCallAuditRepository(context); + var actor = CreateActor(new QueryThrowingRepository(realRepo)); + + actor.Tell( + new SiteCallQueryRequest( + "corr-fault", null, siteId, null, null, false, null, null, null, null, 50), + TestActor); + + var response = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.False(response.Success); + Assert.Empty(response.SiteCalls); + Assert.NotNull(response.ErrorMessage); + Assert.Equal("corr-fault", response.CorrelationId); + } + + /// + /// Test double whose always + /// throws — used to verify the query handler's failure projection produces a + /// Success=false response without crashing the actor. + /// + private sealed class QueryThrowingRepository : ISiteCallAuditRepository + { + private readonly ISiteCallAuditRepository _inner; + + public QueryThrowingRepository(ISiteCallAuditRepository inner) + { + _inner = inner; + } + + public Task UpsertAsync(SiteCall siteCall, CancellationToken ct = default) => + _inner.UpsertAsync(siteCall, ct); + + public Task GetAsync(TrackedOperationId id, CancellationToken ct = default) => + _inner.GetAsync(id, ct); + + public Task> QueryAsync( + SiteCallQueryFilter filter, SiteCallPaging paging, CancellationToken ct = default) => + throw new InvalidOperationException("simulated query failure"); + + public Task PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default) => + _inner.PurgeTerminalAsync(olderThanUtc, ct); + + public Task ComputeKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputeKpisAsync(stuckCutoff, intervalSince, ct); + + public Task> ComputePerSiteKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct); + } + /// /// Tiny test double that delegates to a real repository but throws on a /// specified . Used to verify the actor's @@ -217,5 +504,13 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default) => _inner.PurgeTerminalAsync(olderThanUtc, ct); + + public Task ComputeKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputeKpisAsync(stuckCutoff, intervalSince, ct); + + public Task> ComputePerSiteKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + _inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct); } } diff --git a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditOptionsTests.cs b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditOptionsTests.cs new file mode 100644 index 0000000..6778dc1 --- /dev/null +++ b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditOptionsTests.cs @@ -0,0 +1,15 @@ +namespace ScadaLink.SiteCallAudit.Tests; + +public class SiteCallAuditOptionsTests +{ + [Fact] + public void Defaults_AreExpectedValues() + { + var options = new SiteCallAuditOptions(); + + // Stuck threshold mirrors NotificationOutboxOptions.StuckAgeThreshold. + Assert.Equal(TimeSpan.FromMinutes(10), options.StuckAgeThreshold); + // KPI interval mirrors NotificationOutboxOptions.DeliveredKpiWindow. + Assert.Equal(TimeSpan.FromMinutes(1), options.KpiInterval); + } +} From ac1f73cf8af220080cd3b40c62add37b1efa8b5f Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 04:24:16 -0400 Subject: [PATCH 09/23] fix(sitecallaudit): push StuckOnly filter into SQL; doc accuracy fixes --- .../Messages/Audit/SiteCallQueries.cs | 10 +++ .../Types/Audit/SiteCallKpiSnapshot.cs | 4 +- .../Types/Audit/SiteCallQueryFilter.cs | 17 +++++- .../Types/Audit/SiteCallSiteKpiSnapshot.cs | 2 +- .../Repositories/SiteCallAuditRepository.cs | 7 +++ .../SiteCallAuditActor.cs | 28 ++++----- .../SiteCallAuditRepositoryTests.cs | 61 +++++++++++++++++++ .../SiteCallAuditActorTests.cs | 59 ++++++++++++++++++ 8 files changed, 168 insertions(+), 20 deletions(-) diff --git a/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs b/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs index d23a467..d5c98a4 100644 --- a/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs +++ b/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs @@ -18,6 +18,10 @@ namespace ScadaLink.Commons.Messages.Audit; /// exact-match target filter, consistent with the repository's /// predicate. /// +/// +/// Requested page size. The actor clamps this to the [1, 200] range, so +/// the effective ceiling is 200 rows per page regardless of the value sent. +/// public sealed record SiteCallQueryRequest( string CorrelationId, string? StatusFilter, @@ -39,6 +43,12 @@ public sealed record SiteCallQueryRequest( /// /// none are surfaced here. /// +/// +/// is not called out in the Site Call Audit plan, but +/// it is a real (nullable) +/// column — the last HTTP status code observed for the call — so it is surfaced +/// here for the grid; null for non-HTTP channels or before a first attempt. +/// public sealed record SiteCallSummary( Guid TrackedOperationId, string SourceSite, diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs index 07873fb..fa07c8f 100644 --- a/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs @@ -8,7 +8,7 @@ namespace ScadaLink.Commons.Types.Audit; /// Notification Outbox tile layout. /// /// -/// Count of non-terminal rows (Pending + Retrying) — calls +/// Count of non-terminal rows (TerminalAtUtc IS NULL) — calls /// buffered at sites awaiting retry. /// /// Count of rows in the Parked status. @@ -25,7 +25,7 @@ namespace ScadaLink.Commons.Types.Audit; /// null when there are no non-terminal rows. /// /// -/// Count of non-terminal rows (Pending/Retrying) whose +/// Count of non-terminal rows (TerminalAtUtc IS NULL) whose /// is older /// than the supplied stuck cutoff. Display-only — no escalation. /// diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs index cf7e7d4..63f0c58 100644 --- a/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs @@ -12,10 +12,25 @@ namespace ScadaLink.Commons.Types.Audit; /// underlying columns are bounded ASCII (varchar) and the Central UI Site Calls /// page exposes them as drop-down filters, not free-text search. /// +/// Restrict to a single channel (exact match). +/// Restrict to a single source site (exact match). +/// Restrict to a single status (exact match). +/// Restrict to a single target (exact match). +/// Inclusive lower bound on CreatedAtUtc. +/// Inclusive upper bound on CreatedAtUtc. +/// +/// When set, restrict to stuck rows: TerminalAtUtc IS NULL AND CreatedAtUtc < +/// StuckCutoffUtc. Both columns are plain (no value converter) and compose +/// directly with the keyset cursor. Mirrors +/// ; +/// keeps the "StuckOnly" filter honest so paging never returns under-filled +/// pages with a non-null next cursor. +/// public sealed record SiteCallQueryFilter( string? Channel = null, string? SourceSite = null, string? Status = null, string? Target = null, DateTime? FromUtc = null, - DateTime? ToUtc = null); + DateTime? ToUtc = null, + DateTime? StuckCutoffUtc = null); diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs index c67c895..c5c8208 100644 --- a/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs @@ -7,7 +7,7 @@ namespace ScadaLink.Commons.Types.Audit; /// . /// /// The site identifier these metrics are scoped to. -/// Count of this site's non-terminal rows (Pending + Retrying). +/// Count of this site's non-terminal rows (TerminalAtUtc IS NULL). /// Count of this site's rows in the Parked status. /// /// Count of this site's Failed rows whose TerminalAtUtc is at or diff --git a/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs b/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs index 954e490..d90d0d9 100644 --- a/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs +++ b/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs @@ -164,7 +164,13 @@ WHERE TrackedOperationId = {idText} var fromUtc = filter.FromUtc; var toUtc = filter.ToUtc; + var stuckCutoff = filter.StuckCutoffUtc; + // The stuck predicate (TerminalAtUtc IS NULL AND CreatedAtUtc < cutoff) + // is pushed into SQL here — both columns are plain (no value converter) + // and compose with the keyset cursor, so a StuckOnly page is honest: + // never under-filled with a non-null next cursor. Mirrors how + // NotificationOutboxRepository.QueryAsync applies NotificationOutboxFilter.StuckCutoff. FormattableString sql = $@" SELECT TOP ({paging.PageSize}) TrackedOperationId, Channel, Target, SourceSite, Status, RetryCount, @@ -176,6 +182,7 @@ WHERE ({filter.Channel} IS NULL OR Channel = {filter.Channel}) AND ({filter.Target} IS NULL OR Target = {filter.Target}) AND ({fromUtc} IS NULL OR CreatedAtUtc >= {fromUtc}) AND ({toUtc} IS NULL OR CreatedAtUtc <= {toUtc}) + AND ({stuckCutoff} IS NULL OR (TerminalAtUtc IS NULL AND CreatedAtUtc < {stuckCutoff})) AND ({(hasCursor ? 1 : 0)} = 0 OR CreatedAtUtc < {afterCreated} OR (CreatedAtUtc = {afterCreated} AND TrackedOperationId < {afterIdString})) diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs index a6537dd..11af5ca 100644 --- a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs @@ -21,10 +21,9 @@ namespace ScadaLink.SiteCallAudit; /// /// /// -/// M3 ships the minimum surface: ingest only. Reconciliation, KPIs, and -/// central→site Retry/Discard relay are deferred (per CLAUDE.md scope -/// discipline — Site Call Audit's KPIs and the Retry/Discard relay land in a -/// follow-up). +/// Query, detail and KPIs land in Task 4; reconciliation and the central→site +/// Retry/Discard relay remain deferred (per CLAUDE.md scope discipline — they +/// land in a later follow-up). /// /// /// Per CLAUDE.md "audit-write failure NEVER aborts the user-facing action" — @@ -195,13 +194,20 @@ public class SiteCallAuditActor : ReceiveActor private async Task QueryAsync(SiteCallQueryRequest request, DateTime now) { + var stuckCutoff = now - _options.StuckAgeThreshold; + var filter = new SiteCallQueryFilter( Channel: NullIfBlank(request.ChannelFilter), SourceSite: NullIfBlank(request.SourceSiteFilter), Status: NullIfBlank(request.StatusFilter), Target: NullIfBlank(request.TargetKeyword), FromUtc: request.FromUtc, - ToUtc: request.ToUtc); + ToUtc: request.ToUtc, + // StuckOnly is pushed into the repository SQL via StuckCutoffUtc — + // TerminalAtUtc IS NULL AND CreatedAtUtc < cutoff composes with the + // keyset cursor, so the page is always honest (full pages, no empty + // pages with a non-null next cursor). + StuckCutoffUtc: request.StuckOnly ? stuckCutoff : null); var pageSize = Math.Clamp(request.PageSize, 1, MaxPageSize); var paging = new SiteCallPaging( @@ -214,21 +220,11 @@ public class SiteCallAuditActor : ReceiveActor { var rows = await repository.QueryAsync(filter, paging).ConfigureAwait(false); - var stuckCutoff = now - _options.StuckAgeThreshold; var summaries = rows - // StuckOnly is post-filtered here rather than pushed into the - // repository SQL — the SiteCallQueryFilter has no stuck predicate - // and a status-aware created-before clause does not compose with - // the keyset cursor. The page may therefore return fewer than - // PageSize rows when StuckOnly is set; that is acceptable for a - // display-only filter. - .Where(row => !request.StuckOnly || IsStuck(row, stuckCutoff)) .Select(row => ToSummary(row, stuckCutoff)) .ToList(); - // The next-page cursor is the LAST row of the materialised page — - // before StuckOnly post-filtering, so paging still advances even - // when every row on a page was filtered out. + // The next-page cursor is the last row of the materialised page. var cursorRow = rows.Count > 0 ? rows[^1] : null; return new SiteCallQueryResponse( diff --git a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs index 1156420..67d93f2 100644 --- a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs +++ b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs @@ -271,6 +271,67 @@ public class SiteCallAuditRepositoryTests : IClassFixture Assert.Equal(5, allIds.Count); } + [SkippableFact] + public async Task QueryAsync_StuckCutoff_ComposesWithKeysetPaging_NoEmptyPages() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var site = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + + // Three stuck rows (non-terminal, created before the cutoff) interleaved + // by CreatedAtUtc with non-stuck rows: recent non-terminal rows and an + // old-but-terminal row. The stuck predicate is pushed into the SQL WHERE + // alongside the keyset cursor, so each page must come back full of stuck + // rows — never under-filled by a post-filter. + var t0 = new DateTime(2026, 5, 20, 8, 0, 0, DateTimeKind.Utc); + var cutoff = t0.AddMinutes(10); + + var stuckIds = new List(); + for (var i = 0; i < 3; i++) + { + var stuckId = TrackedOperationId.New(); + stuckIds.Add(stuckId); + // Stuck: non-terminal, created before the cutoff. + await repo.UpsertAsync(NewRow( + stuckId, sourceSite: site, status: "Attempted", + createdAtUtc: t0.AddMinutes(i))); + // Not stuck: non-terminal but created after the cutoff. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), sourceSite: site, status: "Attempted", + createdAtUtc: cutoff.AddMinutes(i + 1))); + // Not stuck: created before the cutoff but terminal. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), sourceSite: site, status: "Delivered", + createdAtUtc: t0.AddMinutes(i), terminal: true, + terminalAtUtc: t0.AddMinutes(i + 1))); + } + + var filter = new SiteCallQueryFilter(SourceSite: site, StuckCutoffUtc: cutoff); + + var page1 = await repo.QueryAsync(filter, new SiteCallPaging(PageSize: 2)); + Assert.Equal(2, page1.Count); + Assert.All(page1, r => Assert.Null(r.TerminalAtUtc)); + Assert.All(page1, r => Assert.True(r.CreatedAtUtc < cutoff)); + + var cursor1 = page1[^1]; + var page2 = await repo.QueryAsync( + filter, + new SiteCallPaging( + PageSize: 2, + AfterCreatedAtUtc: cursor1.CreatedAtUtc, + AfterId: cursor1.TrackedOperationId)); + // Only the third stuck row remains — no empty trailing page. + Assert.Single(page2); + Assert.Null(page2[0].TerminalAtUtc); + Assert.True(page2[0].CreatedAtUtc < cutoff); + + // Exactly the three stuck rows, no overlap, no non-stuck leakage. + var returned = page1.Concat(page2).Select(r => r.TrackedOperationId).ToHashSet(); + Assert.Equal(stuckIds.ToHashSet(), returned); + } + [SkippableFact] public async Task PurgeTerminalAsync_RemovesTerminalAndOld() { diff --git a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs index 73f3ead..3c39257 100644 --- a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs +++ b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs @@ -293,6 +293,65 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture(TimeSpan.FromSeconds(10)); + Assert.True(page1.Success); + // Page is full — two stuck rows, both honestly stuck. + Assert.Equal(2, page1.SiteCalls.Count); + Assert.All(page1.SiteCalls, s => Assert.True(s.IsStuck)); + Assert.NotNull(page1.NextAfterCreatedAtUtc); + + actor.Tell( + new SiteCallQueryRequest( + "corr-stuck-p2", null, siteId, null, null, StuckOnly: true, + null, null, page1.NextAfterCreatedAtUtc, page1.NextAfterId, + PageSize: 2), + TestActor); + var page2 = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.True(page2.Success); + // Final page — the third stuck row, the only remaining match. + Assert.Single(page2.SiteCalls); + Assert.All(page2.SiteCalls, s => Assert.True(s.IsStuck)); + + // No overlap, exactly the three stuck rows across both pages. + var allIds = page1.SiteCalls.Concat(page2.SiteCalls) + .Select(s => s.TrackedOperationId).ToHashSet(); + Assert.Equal(3, allIds.Count); + } + [SkippableFact] public async Task SiteCallDetailRequest_KnownId_ReturnsFullDetail() { From 7816b840c1f45ab31b133c279c99229ec4e5e48f Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 04:36:04 -0400 Subject: [PATCH 10/23] =?UTF-8?q?feat(sitecallaudit):=20central=E2=86=92si?= =?UTF-8?q?te=20Retry/Discard=20relay=20for=20parked=20operations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Messages/Audit/SiteCallRelayMessages.cs | 113 ++++++++++ .../ParkedOperationRelayMessages.cs | 75 +++++++ .../Actors/SiteCommunicationActor.cs | 27 +++ .../CommunicationService.cs | 27 +++ .../Actors/AkkaHostedService.cs | 12 +- .../ScadaLink.SiteCallAudit.csproj | 5 + .../SiteCallAuditActor.cs | 203 +++++++++++++++++ .../SiteCallAuditOptions.cs | 11 + .../ParkedMessageHandlerActor.cs | 47 ++++ .../CommunicationServiceTests.cs | 58 +++++ .../SiteCommunicationActorTests.cs | 68 ++++++ .../SiteCallRelayTests.cs | 212 ++++++++++++++++++ .../ParkedOperationRelayTests.cs | 168 ++++++++++++++ 13 files changed, 1025 insertions(+), 1 deletion(-) create mode 100644 src/ScadaLink.Commons/Messages/Audit/SiteCallRelayMessages.cs create mode 100644 src/ScadaLink.Commons/Messages/RemoteQuery/ParkedOperationRelayMessages.cs create mode 100644 tests/ScadaLink.SiteCallAudit.Tests/SiteCallRelayTests.cs create mode 100644 tests/ScadaLink.StoreAndForward.Tests/ParkedOperationRelayTests.cs diff --git a/src/ScadaLink.Commons/Messages/Audit/SiteCallRelayMessages.cs b/src/ScadaLink.Commons/Messages/Audit/SiteCallRelayMessages.cs new file mode 100644 index 0000000..62039e6 --- /dev/null +++ b/src/ScadaLink.Commons/Messages/Audit/SiteCallRelayMessages.cs @@ -0,0 +1,113 @@ +namespace ScadaLink.Commons.Messages.Audit; + +/// +/// Outcome of a Site Call Audit (#22) Retry/Discard relay — distinguishes the +/// three cases the Central UI Site Calls page must surface differently. +/// +/// +/// The "site unreachable" case is deliberately separate from +/// : central is an eventually-consistent mirror, +/// not the source of truth, so a relay that never reaches the owning site is a +/// transient transport condition the operator can retry — not a failed +/// operation. The UI shows "site unreachable" rather than a generic error. +/// +public enum SiteCallRelayOutcome +{ + /// + /// The owning site received the relay command and applied the action to its + /// Store-and-Forward buffer (the parked cached call was reset to retry, or + /// discarded). The corrected state reaches central later via telemetry. + /// + Applied, + + /// + /// The owning site received the relay command but found nothing to do — no + /// parked row matched the tracked id (already delivered/discarded, or no + /// longer Parked). A definitive answer from the site, not a failure. + /// + NotParked, + + /// + /// The owning site could not be reached (offline / no ClusterClient route / + /// relay timed out). The action was NOT applied; the operator may retry once + /// the site is back online. + /// + SiteUnreachable, + + /// + /// The owning site was reached but reported it could not apply the action + /// (its parked-message handler was unavailable or its store faulted). + /// + OperationFailed, +} + +/// +/// Central UI → Site Call Audit: relay a Retry of a parked cached call to its +/// owning site. The owning site performs the actual retry on its +/// Store-and-Forward buffer — central never mutates the central SiteCalls +/// mirror row. Mirrors +/// +/// but carries (the relay target) and answers with a +/// distinct site-unreachable outcome. +/// +/// Request correlation id, echoed on the response. +/// +/// The cached operation to retry — the PK of the central SiteCalls row +/// and the S&F buffer message id at the owning site. +/// +/// +/// The owning site (SiteCall.SourceSite) the relay is routed to. +/// +public sealed record RetrySiteCallRequest( + string CorrelationId, + Guid TrackedOperationId, + string SourceSite); + +/// +/// Site Call Audit → Central UI: result of a . +/// +/// Echoed request correlation id. +/// +/// The relay outcome — , +/// , +/// or +/// . +/// +/// +/// Convenience flag — true only for . +/// +/// +/// false only for ; lets +/// the UI distinguish "site offline" from "operation failed" without switching +/// on the enum. +/// +/// +/// Human-readable detail for a non-applied outcome; null on success. +/// +public sealed record RetrySiteCallResponse( + string CorrelationId, + SiteCallRelayOutcome Outcome, + bool Success, + bool SiteReachable, + string? ErrorMessage); + +/// +/// Central UI → Site Call Audit: relay a Discard of a parked cached call to its +/// owning site. See for the source-of-truth +/// and routing rationale. +/// +public sealed record DiscardSiteCallRequest( + string CorrelationId, + Guid TrackedOperationId, + string SourceSite); + +/// +/// Site Call Audit → Central UI: result of a . +/// Same shape as . +/// +public sealed record DiscardSiteCallResponse( + string CorrelationId, + SiteCallRelayOutcome Outcome, + bool Success, + bool SiteReachable, + string? ErrorMessage); diff --git a/src/ScadaLink.Commons/Messages/RemoteQuery/ParkedOperationRelayMessages.cs b/src/ScadaLink.Commons/Messages/RemoteQuery/ParkedOperationRelayMessages.cs new file mode 100644 index 0000000..0d60789 --- /dev/null +++ b/src/ScadaLink.Commons/Messages/RemoteQuery/ParkedOperationRelayMessages.cs @@ -0,0 +1,75 @@ +using ScadaLink.Commons.Types; + +namespace ScadaLink.Commons.Messages.RemoteQuery; + +/// +/// Central → site relay command: retry a parked cached operation +/// (ExternalSystem.CachedCall / Database.CachedWrite) on the +/// owning site's Store-and-Forward buffer. Sent over the command/control +/// channel by SiteCallAuditActor when an operator clicks Retry on a +/// Parked Site Call row in the Central UI. +/// +/// +/// +/// The site is the source of truth for cached-call status — central never +/// mutates the central SiteCalls mirror row directly. This command asks +/// the site to reset its own parked row back to Pending so the S&F +/// retry sweep attempts delivery again; the corrected state then flows back to +/// central via the normal cached-call telemetry path. +/// +/// +/// The cached call's S&F buffer message id is the +/// itself (the tracked id is supplied as the +/// buffered row's id at enqueue time), so the site can resolve the parked row +/// directly from . A retry on a row that is not +/// actually Parked is a safe no-op at the site — the ack reports +/// Applied=false rather than corrupting a non-parked row. +/// +/// +/// This is a plain record carrying only ids, so it lives in Commons (no +/// IActorRef field). It mirrors +/// but keys on rather than the opaque S&F +/// message-id string. +/// +/// +public sealed record RetryParkedOperation( + string CorrelationId, + TrackedOperationId TrackedOperationId); + +/// +/// Central → site relay command: discard a parked cached operation on the +/// owning site's Store-and-Forward buffer. Sent over the command/control +/// channel by SiteCallAuditActor when an operator clicks Discard on a +/// Parked Site Call row in the Central UI. See +/// for the source-of-truth and message-id +/// rationale; Discard marks the operation terminally Discarded at the +/// site by removing the parked S&F buffer row. +/// +public sealed record DiscardParkedOperation( + string CorrelationId, + TrackedOperationId TrackedOperationId); + +/// +/// Site → central ack for a / +/// relay command. The site replies this +/// after applying (or safely no-op-ing) the action against its own +/// Store-and-Forward buffer. +/// +/// Correlation id of the originating relay command. +/// +/// true when the parked operation was found and the action was applied; +/// false when no parked row matched the +/// (already delivered, discarded, never cached, or not in a Parked +/// state). A false ack is a definitive "nothing to do" answer from the +/// site — it is NOT a transport failure, so the relay must distinguish it from +/// a site-unreachable timeout. +/// +/// +/// Populated only when the site could not apply the action (e.g. the parked +/// message handler is not available, or the S&F store faulted); null +/// on a clean Applied=true/Applied=false outcome. +/// +public sealed record ParkedOperationActionAck( + string CorrelationId, + bool Applied, + string? ErrorMessage = null); diff --git a/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs b/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs index 1934ac8..42d7635 100644 --- a/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs +++ b/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs @@ -167,6 +167,33 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers } }); + // Task 5 (#22): central→site Retry/Discard relay for parked cached + // operations. SiteCallAuditActor relays these over the command/control + // channel; the parked-message handler executes them against the local + // S&F buffer and replies a ParkedOperationActionAck that routes back to + // the relaying SiteCallAuditActor's Ask. + Receive(msg => + { + if (_parkedMessageHandler != null) + _parkedMessageHandler.Forward(msg); + else + { + Sender.Tell(new ParkedOperationActionAck( + msg.CorrelationId, Applied: false, "Parked message handler not available")); + } + }); + + Receive(msg => + { + if (_parkedMessageHandler != null) + _parkedMessageHandler.Forward(msg); + else + { + Sender.Tell(new ParkedOperationActionAck( + msg.CorrelationId, Applied: false, "Parked message handler not available")); + } + }); + // Notification Outbox: forward a buffered notification submitted by the site // Store-and-Forward Engine to the central cluster. The original Sender (the // S&F forwarder's Ask) is forwarded as the ClusterClient.Send sender so the diff --git a/src/ScadaLink.Communication/CommunicationService.cs b/src/ScadaLink.Communication/CommunicationService.cs index c83901b..e7cadf9 100644 --- a/src/ScadaLink.Communication/CommunicationService.cs +++ b/src/ScadaLink.Communication/CommunicationService.cs @@ -347,6 +347,33 @@ public class CommunicationService return await GetSiteCallAudit().Ask( request, _options.QueryTimeout, cancellationToken); } + + /// + /// Task 5 (#22): relays an operator Retry of a parked cached call to its + /// owning site. The SiteCallAuditActor is Asked directly (it is + /// central-local); it in turn relays a RetryParkedOperation to the + /// owning site and replies a carrying a + /// distinct site-unreachable outcome. Central never mutates the central + /// SiteCalls mirror row. + /// + public async Task RetrySiteCallAsync( + RetrySiteCallRequest request, CancellationToken cancellationToken = default) + { + return await GetSiteCallAudit().Ask( + request, _options.QueryTimeout, cancellationToken); + } + + /// + /// Task 5 (#22): relays an operator Discard of a parked cached call to its + /// owning site. See for the routing and + /// source-of-truth rationale. + /// + public async Task DiscardSiteCallAsync( + DiscardSiteCallRequest request, CancellationToken cancellationToken = default) + { + return await GetSiteCallAudit().Ask( + request, _options.QueryTimeout, cancellationToken); + } } /// diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index 4708744..90dda17 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -446,7 +446,17 @@ akka {{ // the Site Call Audit actor directly (query, KPIs, detail) — mirrors the // SetNotificationOutbox wiring above. commService?.SetSiteCallAudit(siteCallAuditProxy); - _logger.LogInformation("SiteCallAuditActor singleton created"); + + // Task 5 (#22): hand the CentralCommunicationActor to the SiteCallAudit + // actor so it can relay operator Retry/Discard on parked cached calls to + // the owning site (over the per-site ClusterClient via SiteEnvelope). + // Mirrors the RegisterAuditIngest / RegisterNotificationOutbox wiring; + // the message is sent to the singleton proxy so it reaches whichever + // central node currently hosts the singleton. + siteCallAuditProxy.Tell( + new ScadaLink.SiteCallAudit.RegisterCentralCommunication(centralCommActor)); + _logger.LogInformation( + "SiteCallAuditActor singleton created and registered with CentralCommunicationActor"); _logger.LogInformation("Central actors registered. CentralCommunicationActor created."); } diff --git a/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj b/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj index 7603dd6..0a46f34 100644 --- a/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj +++ b/src/ScadaLink.SiteCallAudit/ScadaLink.SiteCallAudit.csproj @@ -24,6 +24,11 @@ project reference is documented here so the actor's scope-per-message GetRequiredService() compiles. --> + + diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs index 11af5ca..e338b05 100644 --- a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs @@ -4,8 +4,10 @@ using Microsoft.Extensions.Logging; using ScadaLink.Commons.Entities.Audit; using ScadaLink.Commons.Interfaces.Repositories; using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Commons.Messages.RemoteQuery; using ScadaLink.Commons.Types; using ScadaLink.Commons.Types.Audit; +using ScadaLink.Communication; namespace ScadaLink.SiteCallAudit; @@ -52,6 +54,19 @@ public class SiteCallAuditActor : ReceiveActor private readonly SiteCallAuditOptions _options; private readonly ILogger _logger; + /// + /// Task 5 (#22): the central→site command transport — the + /// CentralCommunicationActor, which owns the per-site + /// ClusterClient map and routes a to the + /// owning site. Set via by the + /// Host after both actors exist (this actor is a cluster singleton; the + /// transport actor is created separately). Null until registration + /// completes — a relay arriving before then is answered with a + /// outcome, because there + /// is genuinely no route to any site yet. + /// + private IActorRef? _centralCommunication; + /// /// Test-mode constructor — injects a concrete repository instance whose /// lifetime exceeds the test, so the actor reuses the same instance @@ -110,6 +125,15 @@ public class SiteCallAuditActor : ReceiveActor Receive(HandleDetail); Receive(HandleKpi); Receive(HandlePerSiteKpi); + + // Task 5 (#22): central→site Retry/Discard relay for parked cached calls. + Receive(msg => + { + _centralCommunication = msg.CentralCommunication; + _logger.LogInformation("SiteCallAudit registered central→site communication transport"); + }); + Receive(HandleRetrySiteCall); + Receive(HandleDiscardSiteCall); } /// @@ -385,6 +409,175 @@ public class SiteCallAuditActor : ReceiveActor } } + // ── Task 5: central→site Retry/Discard relay ── + + /// + /// Relays an operator Retry of a parked cached call to its owning site. The + /// site is the source of truth — this handler NEVER writes the central + /// SiteCalls mirror row. It wraps a + /// in a addressed to SourceSite, Asks the + /// CentralCommunicationActor (which routes it over the per-site + /// ClusterClient), and maps the site's + /// — or an Ask timeout — onto a + /// . A timeout / no-route is reported as + /// the distinct outcome, + /// not a generic failure, so the Central UI can tell "site offline" from + /// "operation failed". + /// + private void HandleRetrySiteCall(RetrySiteCallRequest request) + { + var sender = Sender; + + if (_centralCommunication is null) + { + // No transport registered yet — there is genuinely no route to any + // site, so the only honest answer is unreachable. + _logger.LogWarning( + "RetrySiteCall {TrackedOperationId} for site {SourceSite} arrived before the " + + "central→site transport was registered; reporting site unreachable", + request.TrackedOperationId, request.SourceSite); + sender.Tell(UnreachableRetry(request.CorrelationId)); + return; + } + + var relay = new RetryParkedOperation( + request.CorrelationId, new TrackedOperationId(request.TrackedOperationId)); + var envelope = new SiteEnvelope(request.SourceSite, relay); + + _centralCommunication.Ask(envelope, _options.RelayTimeout) + .PipeTo( + sender, + success: ack => MapRetryResponse(request.CorrelationId, ack), + failure: ex => MapRetryFailure(request.CorrelationId, request.SourceSite, ex)); + } + + /// + /// Relays an operator Discard of a parked cached call to its owning site. + /// Mirrors — see that method for the + /// source-of-truth and site-unreachable rationale. + /// + private void HandleDiscardSiteCall(DiscardSiteCallRequest request) + { + var sender = Sender; + + if (_centralCommunication is null) + { + _logger.LogWarning( + "DiscardSiteCall {TrackedOperationId} for site {SourceSite} arrived before the " + + "central→site transport was registered; reporting site unreachable", + request.TrackedOperationId, request.SourceSite); + sender.Tell(UnreachableDiscard(request.CorrelationId)); + return; + } + + var relay = new DiscardParkedOperation( + request.CorrelationId, new TrackedOperationId(request.TrackedOperationId)); + var envelope = new SiteEnvelope(request.SourceSite, relay); + + _centralCommunication.Ask(envelope, _options.RelayTimeout) + .PipeTo( + sender, + success: ack => MapDiscardResponse(request.CorrelationId, ack), + failure: ex => MapDiscardFailure(request.CorrelationId, request.SourceSite, ex)); + } + + /// + /// Maps the site's for a Retry onto a + /// : an applied action is + /// ; a clean no-op + /// (Applied=false, no error) is ; + /// an ack carrying an error is + /// — in every case the site WAS reached. + /// + private static RetrySiteCallResponse MapRetryResponse(string correlationId, ParkedOperationActionAck ack) + { + var outcome = ClassifyAck(ack); + return new RetrySiteCallResponse( + correlationId, + outcome, + Success: outcome == SiteCallRelayOutcome.Applied, + SiteReachable: true, + ErrorMessage: AckErrorMessage(outcome, ack)); + } + + private static DiscardSiteCallResponse MapDiscardResponse(string correlationId, ParkedOperationActionAck ack) + { + var outcome = ClassifyAck(ack); + return new DiscardSiteCallResponse( + correlationId, + outcome, + Success: outcome == SiteCallRelayOutcome.Applied, + SiteReachable: true, + ErrorMessage: AckErrorMessage(outcome, ack)); + } + + private RetrySiteCallResponse MapRetryFailure(string correlationId, string sourceSite, Exception ex) + { + _logger.LogWarning(ex, + "Retry relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite); + return UnreachableRetry(correlationId); + } + + private DiscardSiteCallResponse MapDiscardFailure(string correlationId, string sourceSite, Exception ex) + { + _logger.LogWarning(ex, + "Discard relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite); + return UnreachableDiscard(correlationId); + } + + /// + /// Classifies a site ack: Applied=true → applied; Applied=false + /// with no error → the site definitively had nothing parked; Applied=false + /// with an error → the site could not apply the action. + /// + private static SiteCallRelayOutcome ClassifyAck(ParkedOperationActionAck ack) + { + if (ack.Applied) + { + return SiteCallRelayOutcome.Applied; + } + + return ack.ErrorMessage is null + ? SiteCallRelayOutcome.NotParked + : SiteCallRelayOutcome.OperationFailed; + } + + private static string? AckErrorMessage(SiteCallRelayOutcome outcome, ParkedOperationActionAck ack) + { + return outcome switch + { + SiteCallRelayOutcome.Applied => null, + SiteCallRelayOutcome.NotParked => + "The operation is no longer parked at the site (already delivered, discarded, or retrying).", + SiteCallRelayOutcome.OperationFailed => ack.ErrorMessage, + _ => ack.ErrorMessage, + }; + } + + /// Shared "site unreachable" detail text for both relay directions. + private const string SiteUnreachableMessage = + "The owning site is unreachable; the action was not applied. Retry when the site is back online."; + + private static RetrySiteCallResponse UnreachableRetry(string correlationId) + { + return new RetrySiteCallResponse( + correlationId, + SiteCallRelayOutcome.SiteUnreachable, + Success: false, + SiteReachable: false, + ErrorMessage: SiteUnreachableMessage); + } + + private static DiscardSiteCallResponse UnreachableDiscard(string correlationId) + { + return new DiscardSiteCallResponse( + correlationId, + SiteCallRelayOutcome.SiteUnreachable, + Success: false, + SiteReachable: false, + ErrorMessage: SiteUnreachableMessage); + } + /// /// Resolves an for one read message. /// In test mode the injected instance is returned with a null scope; in @@ -464,3 +657,13 @@ public class SiteCallAuditActor : ReceiveActor return string.IsNullOrWhiteSpace(value) ? null : value; } } + +/// +/// Registers the central→site command transport (the CentralCommunicationActor) +/// with the so it can relay Retry/Discard +/// actions on parked cached calls to their owning sites. Sent by the Host after +/// both actors exist. Lives here (not in Commons) because it carries an +/// and ScadaLink.Commons has no Akka reference — +/// the same rationale as RegisterAuditIngest. +/// +public sealed record RegisterCentralCommunication(IActorRef CentralCommunication); diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs index 572fec6..53fe6b9 100644 --- a/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs @@ -23,4 +23,15 @@ public class SiteCallAuditOptions /// NotificationOutboxOptions.DeliveredKpiWindow. /// public TimeSpan KpiInterval { get; set; } = TimeSpan.FromMinutes(1); + + /// + /// Task 5 (#22): Ask timeout for the central→site Retry/Discard relay. When + /// the owning site does not ack a RetryParkedOperation / + /// DiscardParkedOperation within this window — site offline, no + /// ClusterClient route, or central buffering deliberately absent — the relay + /// reports a SiteUnreachable outcome. Default 10 seconds: long enough + /// to absorb a healthy cross-cluster round-trip, short enough that an + /// operator clicking Retry on an offline site gets a fast, honest answer. + /// + public TimeSpan RelayTimeout { get; set; } = TimeSpan.FromSeconds(10); } diff --git a/src/ScadaLink.StoreAndForward/ParkedMessageHandlerActor.cs b/src/ScadaLink.StoreAndForward/ParkedMessageHandlerActor.cs index 0f922cd..8d2b8ed 100644 --- a/src/ScadaLink.StoreAndForward/ParkedMessageHandlerActor.cs +++ b/src/ScadaLink.StoreAndForward/ParkedMessageHandlerActor.cs @@ -24,6 +24,13 @@ public class ParkedMessageHandlerActor : ReceiveActor Receive(HandleQuery); Receive(HandleRetry); Receive(HandleDiscard); + + // Task 5 (#22): central→site Retry/Discard relay for parked cached + // operations. The cached call's S&F buffer message id is the + // TrackedOperationId, so these reuse the same parked-message primitive + // as HandleRetry/HandleDiscard, keyed off the tracked id. + Receive(HandleRetryParkedOperation); + Receive(HandleDiscardParkedOperation); } private void HandleQuery(ParkedMessageQueryRequest msg) @@ -90,6 +97,46 @@ public class ParkedMessageHandlerActor : ReceiveActor msg.CorrelationId, false, ex.GetBaseException().Message)); } + /// + /// Task 5 (#22): executes a central-relayed Retry of a parked cached call. + /// The tracked id is the S&F buffer message id, so this reuses + /// — which only + /// touches rows that are actually Parked (a non-parked or unknown + /// operation yields false, a safe no-op). Central never mutates the + /// central SiteCalls mirror; the reset row's corrected state flows + /// back via the normal cached-call telemetry path. + /// + private void HandleRetryParkedOperation(RetryParkedOperation msg) + { + var sender = Sender; + + _service.RetryParkedMessageAsync(msg.TrackedOperationId.ToString()) + .PipeTo( + sender, + success: applied => new ParkedOperationActionAck( + msg.CorrelationId, applied, ErrorMessage: null), + failure: ex => new ParkedOperationActionAck( + msg.CorrelationId, Applied: false, ex.GetBaseException().Message)); + } + + /// + /// Task 5 (#22): executes a central-relayed Discard of a parked cached call. + /// Mirrors ; Discard removes the + /// parked S&F buffer row (only when it is actually Parked). + /// + private void HandleDiscardParkedOperation(DiscardParkedOperation msg) + { + var sender = Sender; + + _service.DiscardParkedMessageAsync(msg.TrackedOperationId.ToString()) + .PipeTo( + sender, + success: applied => new ParkedOperationActionAck( + msg.CorrelationId, applied, ErrorMessage: null), + failure: ex => new ParkedOperationActionAck( + msg.CorrelationId, Applied: false, ex.GetBaseException().Message)); + } + private static string ExtractMethodName(string payloadJson, Commons.Types.Enums.StoreAndForwardCategory category) { if (string.IsNullOrEmpty(payloadJson)) diff --git a/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs b/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs index b1a1de8..df09395 100644 --- a/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs +++ b/tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs @@ -382,6 +382,64 @@ public class CommunicationServiceTests : TestKit Assert.Equal("plant-a", result.Sites[0].SourceSite); } + [Fact] + public async Task RetrySiteCallAsync_BeforeSiteCallAuditSet_Throws() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + + await Assert.ThrowsAsync(() => + service.RetrySiteCallAsync(new RetrySiteCallRequest("corr-1", Guid.NewGuid(), "plant-a"))); + } + + [Fact] + public async Task RetrySiteCallAsync_AsksSiteCallAuditProxyDirectly() + { + // The relay is initiated by Asking the central-local Site Call Audit + // proxy directly (no SiteEnvelope wrapping at this layer — the actor + // does the site routing itself). + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + var probe = CreateTestProbe(); + service.SetSiteCallAudit(probe.Ref); + + var request = new RetrySiteCallRequest("corr-r", Guid.NewGuid(), "plant-a"); + var task = service.RetrySiteCallAsync(request); + + var received = probe.ExpectMsg(); + Assert.Same(request, received); + var reply = new RetrySiteCallResponse( + "corr-r", SiteCallRelayOutcome.Applied, true, true, null); + probe.Reply(reply); + + Assert.Same(reply, await task); + } + + [Fact] + public async Task DiscardSiteCallAsync_AsksSiteCallAuditProxyDirectly() + { + var service = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + var probe = CreateTestProbe(); + service.SetSiteCallAudit(probe.Ref); + + var request = new DiscardSiteCallRequest("corr-d", Guid.NewGuid(), "plant-a"); + var task = service.DiscardSiteCallAsync(request); + + var received = probe.ExpectMsg(); + Assert.Same(request, received); + var reply = new DiscardSiteCallResponse( + "corr-d", SiteCallRelayOutcome.SiteUnreachable, false, false, "unreachable"); + probe.Reply(reply); + + var result = await task; + Assert.Same(reply, result); + Assert.False(result.SiteReachable); + } + /// /// Stand-in for CentralCommunicationActor: verifies the message is wrapped /// in a SiteEnvelope targeting the requested site and replies with a typed diff --git a/tests/ScadaLink.Communication.Tests/SiteCommunicationActorTests.cs b/tests/ScadaLink.Communication.Tests/SiteCommunicationActorTests.cs index 5a39f2f..4bc880b 100644 --- a/tests/ScadaLink.Communication.Tests/SiteCommunicationActorTests.cs +++ b/tests/ScadaLink.Communication.Tests/SiteCommunicationActorTests.cs @@ -214,4 +214,72 @@ public class SiteCommunicationActorTests : TestKit ExpectMsg(msg => !msg.Success); } + + // ── Task 5 (#22): central→site Retry/Discard relay for parked cached calls ── + + [Fact] + public void RetryParkedOperation_WithHandler_ForwardedToParkedMessageHandler() + { + var dmProbe = CreateTestProbe(); + var handlerProbe = CreateTestProbe(); + var siteActor = Sys.ActorOf(Props.Create(() => + new SiteCommunicationActor("site1", _options, dmProbe.Ref))); + + siteActor.Tell(new RegisterLocalHandler(LocalHandlerType.ParkedMessages, handlerProbe.Ref)); + + var id = Commons.Types.TrackedOperationId.New(); + siteActor.Tell(new RetryParkedOperation("corr-rp", id)); + + handlerProbe.ExpectMsg(msg => + msg.CorrelationId == "corr-rp" && msg.TrackedOperationId.Equals(id)); + } + + [Fact] + public void DiscardParkedOperation_WithHandler_ForwardedToParkedMessageHandler() + { + var dmProbe = CreateTestProbe(); + var handlerProbe = CreateTestProbe(); + var siteActor = Sys.ActorOf(Props.Create(() => + new SiteCommunicationActor("site1", _options, dmProbe.Ref))); + + siteActor.Tell(new RegisterLocalHandler(LocalHandlerType.ParkedMessages, handlerProbe.Ref)); + + var id = Commons.Types.TrackedOperationId.New(); + siteActor.Tell(new DiscardParkedOperation("corr-dp", id)); + + handlerProbe.ExpectMsg(msg => + msg.CorrelationId == "corr-dp" && msg.TrackedOperationId.Equals(id)); + } + + [Fact] + public void RetryParkedOperation_WithoutHandler_RepliesNotAppliedAck() + { + // No parked-message handler registered — the relay must get a definitive + // non-applied ack, not silence (the SiteCallAuditActor's Ask must not + // hang and then mis-report site-unreachable when the site IS reachable). + var dmProbe = CreateTestProbe(); + var siteActor = Sys.ActorOf(Props.Create(() => + new SiteCommunicationActor("site1", _options, dmProbe.Ref))); + + siteActor.Tell(new RetryParkedOperation("corr-no-handler", Commons.Types.TrackedOperationId.New())); + + var ack = ExpectMsg(); + Assert.Equal("corr-no-handler", ack.CorrelationId); + Assert.False(ack.Applied); + Assert.NotNull(ack.ErrorMessage); + } + + [Fact] + public void DiscardParkedOperation_WithoutHandler_RepliesNotAppliedAck() + { + var dmProbe = CreateTestProbe(); + var siteActor = Sys.ActorOf(Props.Create(() => + new SiteCommunicationActor("site1", _options, dmProbe.Ref))); + + siteActor.Tell(new DiscardParkedOperation("corr-no-handler", Commons.Types.TrackedOperationId.New())); + + var ack = ExpectMsg(); + Assert.False(ack.Applied); + Assert.NotNull(ack.ErrorMessage); + } } diff --git a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallRelayTests.cs b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallRelayTests.cs new file mode 100644 index 0000000..6cff69d --- /dev/null +++ b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallRelayTests.cs @@ -0,0 +1,212 @@ +using Akka.Actor; +using Akka.TestKit.Xunit2; +using Microsoft.Extensions.Logging.Abstractions; +using ScadaLink.Commons.Entities.Audit; +using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Commons.Messages.RemoteQuery; +using ScadaLink.Commons.Types; +using ScadaLink.Commons.Types.Audit; +using ScadaLink.Communication; + +namespace ScadaLink.SiteCallAudit.Tests; + +/// +/// Task 5 (#22 Retry/Discard relay): tests for +/// relaying operator Retry/Discard on a parked Site Call down to the owning +/// site. The relay routes a / +/// command via a +/// to the +/// (stood in by a TestProbe here) and awaits the site's +/// . These tests never touch the +/// SiteCalls repository — central never mutates the mirror row. +/// +public class SiteCallRelayTests : TestKit +{ + /// + /// A repository that fails every call — the relay path must NEVER touch the + /// SiteCalls table (central is not the source of truth), so any + /// invocation here is a test failure surfaced as an exception. + /// + private sealed class ThrowingRepository : ISiteCallAuditRepository + { + public Task UpsertAsync(SiteCall siteCall, CancellationToken ct = default) => + throw new InvalidOperationException("relay must not write the SiteCalls row"); + + public Task GetAsync(TrackedOperationId id, CancellationToken ct = default) => + throw new InvalidOperationException("relay must not read the SiteCalls row"); + + public Task> QueryAsync( + SiteCallQueryFilter filter, SiteCallPaging paging, CancellationToken ct = default) => + throw new InvalidOperationException("relay must not query the SiteCalls table"); + + public Task PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default) => + throw new InvalidOperationException("relay must not purge"); + + public Task ComputeKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + throw new InvalidOperationException("relay must not compute KPIs"); + + public Task> ComputePerSiteKpisAsync( + DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) => + throw new InvalidOperationException("relay must not compute per-site KPIs"); + } + + /// + /// Builds a with a throwing repository and a + /// short relay timeout, and registers as the + /// central→site transport. + /// + private IActorRef CreateActor(IActorRef centralComm) + { + var options = new SiteCallAuditOptions { RelayTimeout = TimeSpan.FromMilliseconds(500) }; + var actor = Sys.ActorOf(Props.Create(() => new SiteCallAuditActor( + new ThrowingRepository(), + NullLogger.Instance, + options))); + actor.Tell(new RegisterCentralCommunication(centralComm)); + return actor; + } + + [Fact] + public void RetrySiteCall_RoutesRetryParkedOperation_ToOwningSite() + { + var central = CreateTestProbe(); + var actor = CreateActor(central.Ref); + + var id = Guid.NewGuid(); + actor.Tell(new RetrySiteCallRequest("corr-1", id, "site-north")); + + // The relay must wrap a RetryParkedOperation in a SiteEnvelope addressed + // to the owning site. + var envelope = central.ExpectMsg(); + Assert.Equal("site-north", envelope.SiteId); + var relay = Assert.IsType(envelope.Message); + Assert.Equal(id, relay.TrackedOperationId.Value); + + // The site applies it and acks; the relay reports Applied. + central.Reply(new ParkedOperationActionAck(relay.CorrelationId, Applied: true)); + + var response = ExpectMsg(); + Assert.Equal("corr-1", response.CorrelationId); + Assert.Equal(SiteCallRelayOutcome.Applied, response.Outcome); + Assert.True(response.Success); + Assert.True(response.SiteReachable); + Assert.Null(response.ErrorMessage); + } + + [Fact] + public void DiscardSiteCall_RoutesDiscardParkedOperation_ToOwningSite() + { + var central = CreateTestProbe(); + var actor = CreateActor(central.Ref); + + var id = Guid.NewGuid(); + actor.Tell(new DiscardSiteCallRequest("corr-2", id, "site-south")); + + var envelope = central.ExpectMsg(); + Assert.Equal("site-south", envelope.SiteId); + var relay = Assert.IsType(envelope.Message); + Assert.Equal(id, relay.TrackedOperationId.Value); + + central.Reply(new ParkedOperationActionAck(relay.CorrelationId, Applied: true)); + + var response = ExpectMsg(); + Assert.Equal(SiteCallRelayOutcome.Applied, response.Outcome); + Assert.True(response.Success); + } + + [Fact] + public void RetrySiteCall_SiteRepliesNotApplied_ReportsNotParked() + { + var central = CreateTestProbe(); + var actor = CreateActor(central.Ref); + + actor.Tell(new RetrySiteCallRequest("corr-3", Guid.NewGuid(), "site-north")); + + var envelope = central.ExpectMsg(); + var relay = (RetryParkedOperation)envelope.Message; + // The site found nothing parked — a definitive answer, not a failure. + central.Reply(new ParkedOperationActionAck(relay.CorrelationId, Applied: false)); + + var response = ExpectMsg(); + Assert.Equal(SiteCallRelayOutcome.NotParked, response.Outcome); + Assert.False(response.Success); + Assert.True(response.SiteReachable); + } + + [Fact] + public void RetrySiteCall_SiteRepliesError_ReportsOperationFailed() + { + var central = CreateTestProbe(); + var actor = CreateActor(central.Ref); + + actor.Tell(new RetrySiteCallRequest("corr-4", Guid.NewGuid(), "site-north")); + + var envelope = central.ExpectMsg(); + var relay = (RetryParkedOperation)envelope.Message; + central.Reply(new ParkedOperationActionAck( + relay.CorrelationId, Applied: false, "Parked message handler not available")); + + var response = ExpectMsg(); + Assert.Equal(SiteCallRelayOutcome.OperationFailed, response.Outcome); + Assert.False(response.Success); + // The site WAS reached — this is an operation failure, not unreachable. + Assert.True(response.SiteReachable); + Assert.NotNull(response.ErrorMessage); + } + + [Fact] + public void RetrySiteCall_SiteNeverReplies_ReportsSiteUnreachable() + { + // A central comm probe that silently drops the relay — models an offline + // site / no ClusterClient route: the Ask times out. + var central = CreateTestProbe(); + var actor = CreateActor(central.Ref); + + actor.Tell(new RetrySiteCallRequest("corr-5", Guid.NewGuid(), "site-offline")); + + central.ExpectMsg(); + // Probe does not reply — the relay Ask times out (RelayTimeout = 500ms). + + var response = ExpectMsg(TimeSpan.FromSeconds(3)); + Assert.Equal(SiteCallRelayOutcome.SiteUnreachable, response.Outcome); + Assert.False(response.Success); + // The distinct unreachable signal the UI relies on. + Assert.False(response.SiteReachable); + Assert.NotNull(response.ErrorMessage); + } + + [Fact] + public void DiscardSiteCall_SiteNeverReplies_ReportsSiteUnreachable() + { + var central = CreateTestProbe(); + var actor = CreateActor(central.Ref); + + actor.Tell(new DiscardSiteCallRequest("corr-6", Guid.NewGuid(), "site-offline")); + + central.ExpectMsg(); + + var response = ExpectMsg(TimeSpan.FromSeconds(3)); + Assert.Equal(SiteCallRelayOutcome.SiteUnreachable, response.Outcome); + Assert.False(response.SiteReachable); + } + + [Fact] + public void RetrySiteCall_BeforeCentralCommunicationRegistered_ReportsSiteUnreachable() + { + // No RegisterCentralCommunication — the actor has no transport to reach + // any site, so the only honest answer is "unreachable". + var options = new SiteCallAuditOptions { RelayTimeout = TimeSpan.FromMilliseconds(500) }; + var actor = Sys.ActorOf(Props.Create(() => new SiteCallAuditActor( + new ThrowingRepository(), + NullLogger.Instance, + options))); + + actor.Tell(new RetrySiteCallRequest("corr-7", Guid.NewGuid(), "site-north")); + + var response = ExpectMsg(); + Assert.Equal(SiteCallRelayOutcome.SiteUnreachable, response.Outcome); + Assert.False(response.SiteReachable); + } +} diff --git a/tests/ScadaLink.StoreAndForward.Tests/ParkedOperationRelayTests.cs b/tests/ScadaLink.StoreAndForward.Tests/ParkedOperationRelayTests.cs new file mode 100644 index 0000000..7d4ef6e --- /dev/null +++ b/tests/ScadaLink.StoreAndForward.Tests/ParkedOperationRelayTests.cs @@ -0,0 +1,168 @@ +using Akka.Actor; +using Akka.TestKit.Xunit2; +using Microsoft.Data.Sqlite; +using Microsoft.Extensions.Logging.Abstractions; +using ScadaLink.Commons.Messages.RemoteQuery; +using ScadaLink.Commons.Types; +using ScadaLink.Commons.Types.Enums; + +namespace ScadaLink.StoreAndForward.Tests; + +/// +/// Task 5 (#22 Retry/Discard relay): tests the site-side execution of a +/// central→site / +/// relay command on the . The cached +/// call's S&F buffer message id is the , so +/// the handler resolves the parked row directly from the tracked id and reuses +/// the existing parked-message Retry/Discard primitive. A non-parked operation +/// must be a safe no-op (Applied=false), never a corruption. +/// +public class ParkedOperationRelayTests : TestKit, IAsyncLifetime, IDisposable +{ + private readonly SqliteConnection _keepAlive; + private readonly StoreAndForwardStorage _storage; + private readonly StoreAndForwardService _service; + + public ParkedOperationRelayTests() + { + var connStr = $"Data Source=RelayTests_{Guid.NewGuid():N};Mode=Memory;Cache=Shared"; + _keepAlive = new SqliteConnection(connStr); + _keepAlive.Open(); + + _storage = new StoreAndForwardStorage(connStr, NullLogger.Instance); + + var options = new StoreAndForwardOptions + { + DefaultRetryInterval = TimeSpan.Zero, + DefaultMaxRetries = 1, + RetryTimerInterval = TimeSpan.FromMinutes(10), + ReplicationEnabled = false, + }; + + _service = new StoreAndForwardService( + _storage, options, NullLogger.Instance); + } + + public async Task InitializeAsync() => await _storage.InitializeAsync(); + + public Task DisposeAsync() => Task.CompletedTask; + + protected override void Dispose(bool disposing) + { + if (disposing) _keepAlive.Dispose(); + base.Dispose(disposing); + } + + /// + /// Enqueues a cached-call message whose S&F id is the supplied + /// and parks it via the retry sweep. + /// + private async Task ParkCachedCallAsync(TrackedOperationId id) + { + _service.RegisterDeliveryHandler( + StoreAndForwardCategory.ExternalSystem, _ => throw new HttpRequestException("always fails")); + await _service.EnqueueAsync( + StoreAndForwardCategory.ExternalSystem, "ERP.GetOrder", """{}""", + maxRetries: 1, messageId: id.ToString()); + await _service.RetryPendingMessagesAsync(); + } + + [Fact] + public async Task RetryParkedOperation_ParkedCachedCall_ResetsToPendingAndApplied() + { + var id = TrackedOperationId.New(); + await ParkCachedCallAsync(id); + + var actor = Sys.ActorOf(Props.Create(() => new ParkedMessageHandlerActor(_service, "site-1"))); + actor.Tell(new RetryParkedOperation("corr-1", id)); + + var ack = ExpectMsg(); + Assert.True(ack.Applied); + Assert.Equal("corr-1", ack.CorrelationId); + Assert.Null(ack.ErrorMessage); + + // The parked row was reset back to Pending so the retry sweep picks it up. + var msg = await _storage.GetMessageByIdAsync(id.ToString()); + Assert.NotNull(msg); + Assert.Equal(StoreAndForwardMessageStatus.Pending, msg!.Status); + } + + [Fact] + public async Task DiscardParkedOperation_ParkedCachedCall_RemovesRowAndApplied() + { + var id = TrackedOperationId.New(); + await ParkCachedCallAsync(id); + + var actor = Sys.ActorOf(Props.Create(() => new ParkedMessageHandlerActor(_service, "site-1"))); + actor.Tell(new DiscardParkedOperation("corr-2", id)); + + var ack = ExpectMsg(); + Assert.True(ack.Applied); + Assert.Equal("corr-2", ack.CorrelationId); + + var msg = await _storage.GetMessageByIdAsync(id.ToString()); + Assert.Null(msg); + } + + [Fact] + public void RetryParkedOperation_UnknownOperation_IsSafeNoOp() + { + var actor = Sys.ActorOf(Props.Create(() => new ParkedMessageHandlerActor(_service, "site-1"))); + actor.Tell(new RetryParkedOperation("corr-3", TrackedOperationId.New())); + + var ack = ExpectMsg(); + // No parked row matched — definitive "nothing to do", not an error. + Assert.False(ack.Applied); + Assert.Equal("corr-3", ack.CorrelationId); + Assert.Null(ack.ErrorMessage); + } + + [Fact] + public async Task RetryParkedOperation_NonParkedOperation_IsSafeNoOpAndDoesNotCorrupt() + { + // Enqueue a cached call but DO NOT park it — it stays Pending. + var id = TrackedOperationId.New(); + _service.RegisterDeliveryHandler( + StoreAndForwardCategory.ExternalSystem, _ => throw new HttpRequestException("fails")); + await _service.EnqueueAsync( + StoreAndForwardCategory.ExternalSystem, "ERP.GetOrder", """{}""", + maxRetries: 5, messageId: id.ToString()); + + var before = await _storage.GetMessageByIdAsync(id.ToString()); + Assert.Equal(StoreAndForwardMessageStatus.Pending, before!.Status); + + var actor = Sys.ActorOf(Props.Create(() => new ParkedMessageHandlerActor(_service, "site-1"))); + actor.Tell(new RetryParkedOperation("corr-4", id)); + + var ack = ExpectMsg(); + // The row is Pending, not Parked — Retry must be a no-op, not a mutation. + Assert.False(ack.Applied); + + var after = await _storage.GetMessageByIdAsync(id.ToString()); + Assert.NotNull(after); + Assert.Equal(StoreAndForwardMessageStatus.Pending, after!.Status); + // retry_count untouched — a Parked-only Retry must not reset a live row. + Assert.Equal(before.RetryCount, after.RetryCount); + } + + [Fact] + public async Task DiscardParkedOperation_NonParkedOperation_IsSafeNoOp() + { + var id = TrackedOperationId.New(); + _service.RegisterDeliveryHandler( + StoreAndForwardCategory.ExternalSystem, _ => throw new HttpRequestException("fails")); + await _service.EnqueueAsync( + StoreAndForwardCategory.ExternalSystem, "ERP.GetOrder", """{}""", + maxRetries: 5, messageId: id.ToString()); + + var actor = Sys.ActorOf(Props.Create(() => new ParkedMessageHandlerActor(_service, "site-1"))); + actor.Tell(new DiscardParkedOperation("corr-5", id)); + + var ack = ExpectMsg(); + Assert.False(ack.Applied); + + // The Pending row must NOT have been deleted by a Parked-only Discard. + var after = await _storage.GetMessageByIdAsync(id.ToString()); + Assert.NotNull(after); + } +} From 3cf2b4d47e9f35d348a1ee6ae56d807726c30edc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 04:43:48 -0400 Subject: [PATCH 11/23] fix(sitecallaudit): correct stale relay docs and clarify ack switch --- src/ScadaLink.Host/Actors/AkkaHostedService.cs | 8 +++++--- src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs | 14 ++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index 90dda17..85934b4 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -415,9 +415,11 @@ akka {{ // and NotificationOutbox patterns. M3's dual-write transaction routes // SiteCalls upserts through AuditLogIngestActor's own scope-per-message // ISiteCallAuditRepository resolution, so this singleton is not on the - // M3 happy-path hot path; it exists so future direct-write callers - // (reconciliation puller, central→site Retry/Discard relay, KPI - // projector) Ask through a stable cluster proxy without further wiring. + // M3 happy-path hot path; it exists so direct-write callers Ask through + // a stable cluster proxy without further wiring. The central→site + // Retry/Discard relay now lives in this actor (see the + // RegisterCentralCommunication wiring below); the reconciliation puller + // is the remaining deferred direct-write caller. // Like AuditLogIngestActor, the actor takes the root IServiceProvider // and creates a fresh scope per message because ISiteCallAuditRepository // is a scoped EF Core service. diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs index e338b05..8078060 100644 --- a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs @@ -23,9 +23,10 @@ namespace ScadaLink.SiteCallAudit; /// /// /// -/// Query, detail and KPIs land in Task 4; reconciliation and the central→site -/// Retry/Discard relay remain deferred (per CLAUDE.md scope discipline — they -/// land in a later follow-up). +/// Query, detail and KPIs (Task 4) and the central→site Retry/Discard relay +/// (Task 5 — the relay handlers live in this actor) are implemented; only +/// reconciliation remains deferred (per CLAUDE.md scope discipline — it lands +/// in a later follow-up). /// /// /// Per CLAUDE.md "audit-write failure NEVER aborts the user-facing action" — @@ -550,7 +551,12 @@ public class SiteCallAuditActor : ReceiveActor SiteCallRelayOutcome.NotParked => "The operation is no longer parked at the site (already delivered, discarded, or retrying).", SiteCallRelayOutcome.OperationFailed => ack.ErrorMessage, - _ => ack.ErrorMessage, + // SiteUnreachable is never produced from a ParkedOperationActionAck — + // unreachable responses are built by UnreachableRetry/UnreachableDiscard + // before any ack is classified, so this arm is unreachable by construction. + SiteCallRelayOutcome.SiteUnreachable => ack.ErrorMessage, + _ => throw new ArgumentOutOfRangeException( + nameof(outcome), outcome, "unknown SiteCallRelayOutcome"), }; } From 7e9d74697b756a343e4da589b632e6792fb89f6f Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 04:51:14 -0400 Subject: [PATCH 12/23] feat(centralui): Site Calls page with Retry/Discard and Audit drill-in --- .../Components/Layout/NavMenu.razor | 13 + .../Pages/SiteCalls/SiteCallsReport.razor | 317 +++++++++++++++ .../Pages/SiteCalls/SiteCallsReport.razor.cs | 369 +++++++++++++++++ .../SiteCalls/SiteCallDataSeeder.cs | 135 +++++++ .../SiteCalls/SiteCallsPageTests.cs | 224 +++++++++++ .../Pages/SiteCallsReportPageTests.cs | 377 ++++++++++++++++++ 6 files changed, 1435 insertions(+) create mode 100644 src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor create mode 100644 src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs create mode 100644 tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallDataSeeder.cs create mode 100644 tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs create mode 100644 tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs diff --git a/src/ScadaLink.CentralUI/Components/Layout/NavMenu.razor b/src/ScadaLink.CentralUI/Components/Layout/NavMenu.razor index 1c05b7e..5dc07e7 100644 --- a/src/ScadaLink.CentralUI/Components/Layout/NavMenu.razor +++ b/src/ScadaLink.CentralUI/Components/Layout/NavMenu.razor @@ -91,6 +91,19 @@ + @* Site Calls — Site Call Audit (#22). Deployment-role only, + matching the Notification Report page's gate; the section + header sits inside the policy block so a non-Deployment + user does not see the heading. *@ + + + + + + + @* Monitoring — Health Dashboard is all-roles; Event Logs and Parked Messages are Deployment-role only (Component-CentralUI). *@ diff --git a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor new file mode 100644 index 0000000..019b316 --- /dev/null +++ b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor @@ -0,0 +1,317 @@ +@page "/site-calls/report" +@attribute [Authorize(Policy = ScadaLink.Security.AuthorizationPolicies.RequireDeployment)] +@using ScadaLink.Commons.Entities.Sites +@using ScadaLink.Commons.Interfaces.Repositories +@using ScadaLink.Commons.Messages.Audit +@using ScadaLink.Communication +@inject CommunicationService CommunicationService +@inject ISiteRepository SiteRepository +@inject IDialogService Dialog +@inject ILogger Logger + +
+ + +
+

Site Calls

+ +
+ + @* ── Filters ── *@ +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+
+
+ +
+
+ +
+
+
+
+ + @if (_listError != null) + { +
@_listError
+ } + + @* ── Site call list ── *@ + @if (_siteCalls == null) + { + @if (_loading) + { +
Loading…
+ } + } + else if (_siteCalls.Count == 0) + { +
+
+
No site calls
+
No cached calls match the current filters.
+
+
+ } + else + { +
+ + + + + + + + + + + + + + + + + @foreach (var c in _siteCalls) + { + + + + + + + + + + + + + } + +
Tracked operationSource siteChannelTargetStatusRetriesLast errorCreatedUpdatedActions
@ShortId(c.TrackedOperationId)@SiteName(c.SourceSite)@c.Channel@c.Target + @c.Status + @if (c.IsStuck) + { + Stuck + } + @c.RetryCount + @if (!string.IsNullOrEmpty(c.LastError)) + { +
@c.LastError
+ } + else + { + + } +
+ @* The TrackedOperationId is the audit CorrelationId, so the + link deep-links into the central Audit Log pre-filtered to + this cached call's lifecycle events. *@ + + View audit history + + @* Retry/Discard relay only on Parked rows — central relays the + action to the owning site; Failed and other statuses are not + actionable from central. *@ + @if (c.Status == "Parked") + { + + + } +
+
+ + @* Keyset paging — the Task 4 query response carries a (CreatedAtUtc, Id) + cursor rather than page numbers, so we keep a stack of cursors to step + backwards and the response's NextAfter* cursor to step forwards. *@ +
+ + Page @(_cursorStack.Count + 1) · @_siteCalls.Count rows + +
+ + +
+
+ } +
+ +@* ── Row detail modal ── *@ +@if (_detailSiteCall != null) +{ + var d = _detailSiteCall; + +} diff --git a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs new file mode 100644 index 0000000..2a37606 --- /dev/null +++ b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs @@ -0,0 +1,369 @@ +using Microsoft.Extensions.Logging; +using ScadaLink.CentralUI.Components.Shared; +using ScadaLink.Commons.Entities.Sites; +using ScadaLink.Commons.Messages.Audit; + +namespace ScadaLink.CentralUI.Components.Pages.SiteCalls; + +/// +/// Code-behind for the central Site Calls report page (Site Call Audit #22). A +/// near-mirror of : +/// it queries the central SiteCalls table via +/// , +/// shows a filterable/keyset-paged grid and a detail modal, and relays Retry/Discard +/// of Parked cached calls to their owning site. +/// +/// +/// Unlike the Notification report, the query response uses a (CreatedAtUtc DESC, +/// TrackedOperationId DESC) keyset cursor rather than page numbers, so paging +/// keeps a stack of the cursors that opened each page (to step backwards) plus the +/// response's NextAfter* cursor (to step forwards). +/// +/// +/// +/// Retry/Discard relay to the owning site has a distinct +/// outcome — central is an eventually-consistent mirror, not the source of truth, so +/// a relay that never reaches the site is a transient transport condition, surfaced +/// to the operator differently from a generic failure. +/// +/// +public partial class SiteCallsReport +{ + private const int PageSize = 50; + + private ToastNotification _toast = default!; + private List _sites = new(); + + // List + private List? _siteCalls; + private bool _loading; + private string? _listError; + private bool _actionInProgress; + + // Keyset paging. The first page is opened with the empty (null, null) cursor. + // _cursorStack holds the cursors of the PREVIOUSLY visited pages — it is empty + // on page 1, has one entry on page 2, and so on; Previous pops it. _nextCursor + // is the cursor for the following page, echoed back by the last query. + private readonly Stack<(DateTime? AfterCreatedAtUtc, Guid? AfterId)> _cursorStack = new(); + private (DateTime? AfterCreatedAtUtc, Guid? AfterId) _currentCursor = (null, null); + private (DateTime? AfterCreatedAtUtc, Guid? AfterId)? _nextCursor; + + // Row detail modal + private SiteCallSummary? _detailSiteCall; + private SiteCallDetail? _detail; + private bool _detailLoading; + private string? _detailError; + + // Filters + private string _statusFilter = string.Empty; + private string _channelFilter = string.Empty; + private string _siteFilter = string.Empty; + private string _targetFilter = string.Empty; + private bool _stuckOnly; + private DateTime? _fromFilter; + private DateTime? _toFilter; + + private bool HasNextPage => _nextCursor is not null; + + protected override async Task OnInitializedAsync() + { + try + { + _sites = (await SiteRepository.GetAllSitesAsync()).ToList(); + } + catch (Exception ex) + { + // Non-fatal — the source-site filter just falls back to raw site IDs. + Logger.LogWarning(ex, "Failed to load sites for the Site Calls source-site filter."); + } + + await RefreshAll(); + } + + /// Re-fetch the current page (Refresh button, and after a relay action). + private async Task RefreshAll() + { + await FetchPage(_currentCursor); + } + + /// Apply the filters and start again from the first page. + private async Task Search() + { + _cursorStack.Clear(); + await FetchPage((null, null)); + } + + private async Task PrevPage() + { + if (_cursorStack.Count == 0) + { + return; + } + + // The top of the stack is the cursor of the page BEFORE the current one. + var previousCursor = _cursorStack.Pop(); + await FetchPage(previousCursor); + } + + private async Task NextPage() + { + if (_nextCursor is not { } next) + { + return; + } + + // Stepping forward: remember the current page's cursor so Previous can + // return to it. + _cursorStack.Push(_currentCursor); + await FetchPage(next); + } + + /// + /// Fetch one keyset page starting after . + /// + private async Task FetchPage( + (DateTime? AfterCreatedAtUtc, Guid? AfterId) cursor) + { + _loading = true; + _listError = null; + try + { + var request = new SiteCallQueryRequest( + CorrelationId: Guid.NewGuid().ToString("N"), + StatusFilter: NullIfEmpty(_statusFilter), + SourceSiteFilter: NullIfEmpty(_siteFilter), + ChannelFilter: NullIfEmpty(_channelFilter), + TargetKeyword: NullIfEmpty(_targetFilter), + StuckOnly: _stuckOnly, + FromUtc: ToUtc(_fromFilter), + ToUtc: ToUtc(_toFilter), + AfterCreatedAtUtc: cursor.AfterCreatedAtUtc, + AfterId: cursor.AfterId, + PageSize: PageSize); + + var response = await CommunicationService.QuerySiteCallsAsync(request); + if (response.Success) + { + _siteCalls = response.SiteCalls.ToList(); + _currentCursor = cursor; + + // The response echoes the last row's cursor. A short page (fewer + // rows than requested) has no further page even if a cursor came + // back, so gate Next on a full page too. + _nextCursor = response.NextAfterCreatedAtUtc is { } nextCreated + && response.NextAfterId is { } nextId + && _siteCalls.Count == PageSize + ? (nextCreated, nextId) + : null; + } + else + { + _listError = response.ErrorMessage ?? "Query failed."; + } + } + catch (Exception ex) + { + _listError = $"Query failed: {ex.Message}"; + } + _loading = false; + } + + private async Task RetrySiteCall(SiteCallSummary c) + { + var confirmed = await Dialog.ConfirmAsync( + "Retry cached call", + $"Relay a retry of cached call {ShortId(c.TrackedOperationId)} (\"{c.Target}\") " + + $"to site {SiteName(c.SourceSite)}?"); + if (!confirmed) return; + + _actionInProgress = true; + try + { + var response = await CommunicationService.RetrySiteCallAsync( + new RetrySiteCallRequest(Guid.NewGuid().ToString("N"), c.TrackedOperationId, c.SourceSite)); + ShowRelayOutcome(response.Outcome, response.SiteReachable, response.ErrorMessage, + appliedMessage: $"Retry of {ShortId(c.TrackedOperationId)} relayed to {SiteName(c.SourceSite)}."); + if (response.Success) + { + await RefreshAll(); + } + } + catch (Exception ex) + { + _toast.ShowError($"Retry failed: {ex.Message}"); + } + _actionInProgress = false; + } + + private async Task DiscardSiteCall(SiteCallSummary c) + { + var confirmed = await Dialog.ConfirmAsync( + "Discard cached call", + $"Relay a discard of cached call {ShortId(c.TrackedOperationId)} (\"{c.Target}\") " + + $"to site {SiteName(c.SourceSite)}? This cannot be undone.", + danger: true); + if (!confirmed) return; + + _actionInProgress = true; + try + { + var response = await CommunicationService.DiscardSiteCallAsync( + new DiscardSiteCallRequest(Guid.NewGuid().ToString("N"), c.TrackedOperationId, c.SourceSite)); + ShowRelayOutcome(response.Outcome, response.SiteReachable, response.ErrorMessage, + appliedMessage: $"Discard of {ShortId(c.TrackedOperationId)} relayed to {SiteName(c.SourceSite)}."); + if (response.Success) + { + await RefreshAll(); + } + } + catch (Exception ex) + { + _toast.ShowError($"Discard failed: {ex.Message}"); + } + _actionInProgress = false; + } + + /// + /// Surface a relay outcome on the toast. The + /// case is deliberately distinct from a generic failure — the action was not + /// applied but the operator can retry once the site is back online. + /// + private void ShowRelayOutcome( + SiteCallRelayOutcome outcome, bool siteReachable, string? errorMessage, string appliedMessage) + { + switch (outcome) + { + case SiteCallRelayOutcome.Applied: + _toast.ShowSuccess(appliedMessage); + break; + case SiteCallRelayOutcome.NotParked: + _toast.ShowInfo(errorMessage + ?? "The site reported nothing to do — the cached call is no longer parked."); + break; + case SiteCallRelayOutcome.SiteUnreachable: + _toast.ShowError(errorMessage + ?? "Site unreachable — the relay did not reach the owning site. " + + "Try again once the site is back online."); + break; + case SiteCallRelayOutcome.OperationFailed: + default: + _toast.ShowError(errorMessage ?? "The site could not apply the action."); + break; + } + + // Defensive: a non-Applied/non-Unreachable outcome that somehow reports an + // unreachable site still gets the unreachable wording. + if (outcome != SiteCallRelayOutcome.SiteUnreachable && !siteReachable + && outcome != SiteCallRelayOutcome.Applied) + { + _toast.ShowError("Site unreachable — the relay did not reach the owning site."); + } + } + + private async Task ShowDetail(SiteCallSummary c) + { + // The summary fields render immediately from the grid row; the full detail + // (HttpStatus, all timestamps, LastError) fills in once the fetch completes. + _detailSiteCall = c; + _detail = null; + _detailError = null; + _detailLoading = true; + StateHasChanged(); + + try + { + var response = await CommunicationService.GetSiteCallDetailAsync( + new SiteCallDetailRequest(Guid.NewGuid().ToString("N"), c.TrackedOperationId)); + if (response.Success && response.Detail != null) + { + _detail = response.Detail; + } + else + { + _detailError = response.ErrorMessage ?? "Failed to load site call detail."; + } + } + catch (Exception ex) + { + _detailError = $"Failed to load site call detail: {ex.Message}"; + } + _detailLoading = false; + } + + private void CloseDetail() + { + _detailSiteCall = null; + _detail = null; + _detailError = null; + _detailLoading = false; + } + + private async Task RetryFromDetail(SiteCallSummary c) + { + await RetrySiteCall(c); + // RefreshAll replaces the row list; close the modal so the user sees the + // refreshed grid rather than a now-stale detail snapshot. + CloseDetail(); + } + + private async Task DiscardFromDetail(SiteCallSummary c) + { + await DiscardSiteCall(c); + CloseDetail(); + } + + private void ClearFilters() + { + _statusFilter = string.Empty; + _channelFilter = string.Empty; + _siteFilter = string.Empty; + _targetFilter = string.Empty; + _stuckOnly = false; + _fromFilter = null; + _toFilter = null; + } + + private bool HasActiveFilters => + !string.IsNullOrEmpty(_statusFilter) || + !string.IsNullOrEmpty(_channelFilter) || + !string.IsNullOrEmpty(_siteFilter) || + !string.IsNullOrEmpty(_targetFilter) || + _stuckOnly || + _fromFilter != null || + _toFilter != null; + + private string SiteName(string siteId) => + _sites.FirstOrDefault(s => s.SiteIdentifier == siteId)?.Name ?? siteId; + + private static string? NullIfEmpty(string s) => string.IsNullOrWhiteSpace(s) ? null : s.Trim(); + + /// + /// The filter inputs are UTC wall-clock — stamp + /// on the local-typed value so the query is unambiguous. + /// + private static DateTime? ToUtc(DateTime? value) => + value == null ? null : DateTime.SpecifyKind(value.Value, DateTimeKind.Utc); + + /// + /// The SiteCalls timestamps are UTC ; wrap them as + /// a for TimestampDisplay. + /// + private static DateTimeOffset? AsOffset(DateTime? value) => + value == null + ? null + : new DateTimeOffset(DateTime.SpecifyKind(value.Value, DateTimeKind.Utc)); + + private static string ShortId(Guid id) => id.ToString("N")[..12]; + + private static string StatusBadgeClass(string status) => status switch + { + "Delivered" => "bg-success", + "Parked" => "bg-danger", + "Failed" => "bg-danger", + "Attempted" => "bg-warning text-dark", + "Forwarded" => "bg-info text-dark", + "Submitted" => "bg-info text-dark", + "Discarded" => "bg-secondary", + _ => "bg-light text-dark" + }; +} diff --git a/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallDataSeeder.cs b/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallDataSeeder.cs new file mode 100644 index 0000000..b05d3f0 --- /dev/null +++ b/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallDataSeeder.cs @@ -0,0 +1,135 @@ +using Microsoft.Data.SqlClient; + +namespace ScadaLink.CentralUI.PlaywrightTests.SiteCalls; + +/// +/// Direct-SQL seeding helper for the Site Calls page Playwright E2E tests +/// (Site Call Audit #22, follow-ups Task 6). +/// +/// +/// The Site Calls page reads the central SiteCalls table through the +/// SiteCallAuditActor, which is a pure read-from-table mirror — so a row +/// INSERTed directly into SiteCalls surfaces on the page exactly as a +/// telemetry-ingested row would. Mirrors : +/// each test inserts its own rows at setup and best-effort deletes them at +/// teardown, keeping the suite self-contained without touching +/// infra/mssql/seed-config.sql. +/// +/// +/// +/// Rows are tagged with a unique Target prefix derived from the test name +/// + a GUID so the teardown DELETE never touches rows the cluster itself +/// produced. CreatedAtUtc/UpdatedAtUtc are pinned to "now" so the +/// page's default (unconstrained) query window sees the row. +/// +/// +internal static class SiteCallDataSeeder +{ + private const string DefaultConnectionString = + "Server=localhost,1433;Database=ScadaLinkConfig;User Id=scadalink_app;Password=ScadaLink_Dev1#;TrustServerCertificate=true;Encrypt=false;Connect Timeout=5"; + + private const string EnvVar = "SCADALINK_PLAYWRIGHT_DB"; + + /// + /// Connection string for the running cluster's configuration DB. Resolved + /// from SCADALINK_PLAYWRIGHT_DB when set, otherwise the local docker + /// dev defaults. + /// + public static string ConnectionString + { + get + { + var fromEnv = Environment.GetEnvironmentVariable(EnvVar); + return string.IsNullOrWhiteSpace(fromEnv) ? DefaultConnectionString : fromEnv; + } + } + + /// + /// Inserts a single row into the central SiteCalls table. Optional + /// fields are nullable so a test can shape the row to the status/channel it + /// needs for its grid assertions. TrackedOperationId is stored as the + /// 36-character GUID string the entity mapping expects. + /// + public static async Task InsertSiteCallAsync( + Guid trackedOperationId, + string channel, + string target, + string sourceSite, + string status, + int retryCount, + DateTime createdAtUtc, + DateTime updatedAtUtc, + string? lastError = null, + int? httpStatus = null, + DateTime? terminalAtUtc = null, + CancellationToken ct = default) + { + const string sql = @" +INSERT INTO [SiteCalls] +([TrackedOperationId], [Channel], [Target], [SourceSite], [Status], [RetryCount], + [LastError], [HttpStatus], [CreatedAtUtc], [UpdatedAtUtc], [TerminalAtUtc], [IngestedAtUtc]) +VALUES +(@id, @channel, @target, @sourceSite, @status, @retryCount, + @lastError, @httpStatus, @createdAtUtc, @updatedAtUtc, @terminalAtUtc, SYSUTCDATETIME());"; + + await using var connection = new SqlConnection(ConnectionString); + await connection.OpenAsync(ct); + await using var cmd = connection.CreateCommand(); + cmd.CommandText = sql; + cmd.Parameters.AddWithValue("@id", trackedOperationId.ToString()); + cmd.Parameters.AddWithValue("@channel", channel); + cmd.Parameters.AddWithValue("@target", target); + cmd.Parameters.AddWithValue("@sourceSite", sourceSite); + cmd.Parameters.AddWithValue("@status", status); + cmd.Parameters.AddWithValue("@retryCount", retryCount); + cmd.Parameters.AddWithValue("@lastError", (object?)lastError ?? DBNull.Value); + cmd.Parameters.AddWithValue("@httpStatus", (object?)httpStatus ?? DBNull.Value); + cmd.Parameters.AddWithValue("@createdAtUtc", createdAtUtc); + cmd.Parameters.AddWithValue("@updatedAtUtc", updatedAtUtc); + cmd.Parameters.AddWithValue("@terminalAtUtc", (object?)terminalAtUtc ?? DBNull.Value); + + await cmd.ExecuteNonQueryAsync(ct); + } + + /// + /// Best-effort cleanup. Deletes every SiteCalls row whose Target + /// starts with . Swallows all errors — the + /// prefix carries a per-run GUID so the rows are unique to this test run. + /// + public static async Task DeleteByTargetPrefixAsync(string targetPrefix, CancellationToken ct = default) + { + try + { + await using var connection = new SqlConnection(ConnectionString); + await connection.OpenAsync(ct); + await using var cmd = connection.CreateCommand(); + cmd.CommandText = "DELETE FROM [SiteCalls] WHERE [Target] LIKE @prefix"; + cmd.Parameters.AddWithValue("@prefix", targetPrefix + "%"); + await cmd.ExecuteNonQueryAsync(ct); + } + catch + { + // Best-effort — the prefix carries a GUID so the rows are unique to + // this test run and won't collide on the next pass. + } + } + + /// + /// Probe whether the configuration DB is reachable. Tests gate their per-test + /// setup on this so a downed cluster surfaces a clear message rather than an + /// opaque . + /// + public static async Task IsAvailableAsync(CancellationToken ct = default) + { + try + { + await using var connection = new SqlConnection(ConnectionString); + await connection.OpenAsync(ct); + return true; + } + catch + { + return false; + } + } +} diff --git a/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs b/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs new file mode 100644 index 0000000..6a6c6e9 --- /dev/null +++ b/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs @@ -0,0 +1,224 @@ +using Microsoft.Playwright; + +namespace ScadaLink.CentralUI.PlaywrightTests.SiteCalls; + +/// +/// End-to-end coverage for the central Site Calls page (Site Call Audit #22, +/// follow-ups Task 6). +/// +/// +/// Each test seeds its own SiteCalls rows directly into the running +/// cluster's configuration database via , +/// exercises the UI through Playwright, then best-effort deletes the rows by +/// their Target prefix. The Site Calls page reads the SiteCalls +/// table through the SiteCallAuditActor (a pure read-from-table mirror), +/// so a directly-INSERTed row surfaces exactly as a telemetry-ingested row +/// would — the same seeding model the Audit Log E2E tests use. The pattern +/// keeps each test self-contained without touching +/// infra/mssql/seed-config.sql. +/// +/// +/// +/// Scenarios covered (per the Task 6 brief): +/// +/// PageLoads — the page renders for a Deployment-role user. +/// FilterNarrowing — a channel filter narrows the results grid. +/// DrillIn — the "View audit history" link deep-links into the +/// Audit Log pre-filtered to the call's TrackedOperationId. +/// RetryDiscardVisibility — Retry/Discard appear only on Parked +/// rows, never on Failed (or other) rows. +/// +/// +/// +[Collection("Playwright")] +public class SiteCallsPageTests +{ + private const string SiteCallsUrl = "/site-calls/report"; + + private readonly PlaywrightFixture _fixture; + + public SiteCallsPageTests(PlaywrightFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task PageLoads_ForDeploymentUser() + { + var page = await _fixture.NewAuthenticatedPageAsync(); + await page.GotoAsync($"{PlaywrightFixture.BaseUrl}{SiteCallsUrl}"); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + Assert.Contains(SiteCallsUrl, page.Url); + await Assertions.Expect(page.Locator("h4:has-text('Site Calls')")).ToBeVisibleAsync(); + // The filter card's Query button is the page's primary action. + await Assertions.Expect(page.Locator("[data-test='site-calls-query']")).ToBeVisibleAsync(); + } + + [Fact] + public async Task FilterNarrowing_ChannelFilterShrinksGrid() + { + if (!await SiteCallDataSeeder.IsAvailableAsync()) + { + throw new InvalidOperationException( + "SiteCallDataSeeder cannot reach MSSQL at localhost:1433 — bring up infra/docker-compose and docker/deploy.sh, " + + "or set SCADALINK_PLAYWRIGHT_DB to a reachable connection string."); + } + + var runId = Guid.NewGuid().ToString("N"); + var targetPrefix = $"playwright-test/sc-filter/{runId}/"; + var apiId = Guid.NewGuid(); + var dbId = Guid.NewGuid(); + var now = DateTime.UtcNow; + + try + { + // One ApiOutbound row, one DbOutbound row — distinct Targets. + await SiteCallDataSeeder.InsertSiteCallAsync( + trackedOperationId: apiId, channel: "ApiOutbound", target: targetPrefix + "api", + sourceSite: "plant-a", status: "Delivered", retryCount: 0, + createdAtUtc: now, updatedAtUtc: now, httpStatus: 200, terminalAtUtc: now); + await SiteCallDataSeeder.InsertSiteCallAsync( + trackedOperationId: dbId, channel: "DbOutbound", target: targetPrefix + "db", + sourceSite: "plant-a", status: "Delivered", retryCount: 0, + createdAtUtc: now, updatedAtUtc: now, terminalAtUtc: now); + + var page = await _fixture.NewAuthenticatedPageAsync(); + await page.GotoAsync($"{PlaywrightFixture.BaseUrl}{SiteCallsUrl}"); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + // Unfiltered query: both seeded rows appear (the Target keyword scopes + // to this run so unrelated cluster rows do not interfere). + await page.Locator("#sc-search").FillAsync(targetPrefix + "api"); + await page.Locator("[data-test='site-calls-query']").ClickAsync(); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + // Only the ApiOutbound row matches the exact target keyword. + await Assertions.Expect(page.Locator($"text={targetPrefix}api")).ToBeVisibleAsync(); + Assert.Equal(0, await page.Locator($"text={targetPrefix}db").CountAsync()); + + // Now filter by Channel = DbOutbound with the db target — the row flips. + await page.Locator("#sc-search").FillAsync(targetPrefix + "db"); + await page.Locator("#sc-channel").SelectOptionAsync("DbOutbound"); + await page.Locator("[data-test='site-calls-query']").ClickAsync(); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + await Assertions.Expect(page.Locator($"text={targetPrefix}db")).ToBeVisibleAsync(); + Assert.Equal(0, await page.Locator($"text={targetPrefix}api").CountAsync()); + } + finally + { + await SiteCallDataSeeder.DeleteByTargetPrefixAsync(targetPrefix); + } + } + + [Fact] + public async Task DrillIn_ViewAuditHistory_NavigatesToPreFilteredAuditLog() + { + if (!await SiteCallDataSeeder.IsAvailableAsync()) + { + throw new InvalidOperationException("MSSQL unavailable; see FilterNarrowing test for setup instructions."); + } + + var runId = Guid.NewGuid().ToString("N"); + var targetPrefix = $"playwright-test/sc-drill-in/{runId}/"; + var trackedId = Guid.NewGuid(); + var now = DateTime.UtcNow; + + try + { + await SiteCallDataSeeder.InsertSiteCallAsync( + trackedOperationId: trackedId, channel: "ApiOutbound", target: targetPrefix + "endpoint", + sourceSite: "plant-a", status: "Delivered", retryCount: 0, + createdAtUtc: now, updatedAtUtc: now, httpStatus: 200, terminalAtUtc: now); + + var page = await _fixture.NewAuthenticatedPageAsync(); + await page.GotoAsync($"{PlaywrightFixture.BaseUrl}{SiteCallsUrl}"); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + await page.Locator("#sc-search").FillAsync(targetPrefix + "endpoint"); + await page.Locator("[data-test='site-calls-query']").ClickAsync(); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + // The row carries a "View audit history" link whose href is the + // canonical correlationId deep-link — the TrackedOperationId IS the + // audit CorrelationId. + var link = page.Locator($"a[data-test='audit-link-{trackedId}']"); + await Assertions.Expect(link).ToBeVisibleAsync(); + var href = await link.GetAttributeAsync("href"); + Assert.Equal($"/audit/log?correlationId={trackedId}", href); + + // Following the link lands on the Audit Log page with the query-string + // drill-in context intact. + await link.ClickAsync(); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + Assert.Contains($"correlationId={trackedId}", page.Url); + await Assertions.Expect(page.Locator("h1:has-text('Audit Log')")).ToBeVisibleAsync(); + } + finally + { + await SiteCallDataSeeder.DeleteByTargetPrefixAsync(targetPrefix); + } + } + + [Fact] + public async Task RetryDiscard_VisibleOnlyOnParkedRows() + { + if (!await SiteCallDataSeeder.IsAvailableAsync()) + { + throw new InvalidOperationException("MSSQL unavailable; see FilterNarrowing test for setup instructions."); + } + + var runId = Guid.NewGuid().ToString("N"); + var targetPrefix = $"playwright-test/sc-actions/{runId}/"; + var parkedId = Guid.NewGuid(); + var failedId = Guid.NewGuid(); + var now = DateTime.UtcNow; + + try + { + // One Parked row (actionable) and one Failed row (terminal — not + // actionable from central). + await SiteCallDataSeeder.InsertSiteCallAsync( + trackedOperationId: parkedId, channel: "ApiOutbound", target: targetPrefix + "parked", + sourceSite: "plant-a", status: "Parked", retryCount: 3, + lastError: "HTTP 503 from ERP", httpStatus: 503, + createdAtUtc: now, updatedAtUtc: now); + await SiteCallDataSeeder.InsertSiteCallAsync( + trackedOperationId: failedId, channel: "DbOutbound", target: targetPrefix + "failed", + sourceSite: "plant-a", status: "Failed", retryCount: 1, + lastError: "constraint violation", + createdAtUtc: now, updatedAtUtc: now, terminalAtUtc: now); + + var page = await _fixture.NewAuthenticatedPageAsync(); + await page.GotoAsync($"{PlaywrightFixture.BaseUrl}{SiteCallsUrl}"); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + // Query the parked row first. + await page.Locator("#sc-search").FillAsync(targetPrefix + "parked"); + await page.Locator("[data-test='site-calls-query']").ClickAsync(); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + var parkedRow = page.Locator("tbody tr", new() { HasText = targetPrefix + "parked" }); + await Assertions.Expect(parkedRow).ToBeVisibleAsync(); + // The Parked row exposes both Retry and Discard. + await Assertions.Expect(parkedRow.Locator("button:has-text('Retry')")).ToBeVisibleAsync(); + await Assertions.Expect(parkedRow.Locator("button:has-text('Discard')")).ToBeVisibleAsync(); + + // Now the Failed row — Retry/Discard are absent. + await page.Locator("#sc-search").FillAsync(targetPrefix + "failed"); + await page.Locator("[data-test='site-calls-query']").ClickAsync(); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + var failedRow = page.Locator("tbody tr", new() { HasText = targetPrefix + "failed" }); + await Assertions.Expect(failedRow).ToBeVisibleAsync(); + Assert.Equal(0, await failedRow.Locator("button:has-text('Retry')").CountAsync()); + Assert.Equal(0, await failedRow.Locator("button:has-text('Discard')").CountAsync()); + } + finally + { + await SiteCallDataSeeder.DeleteByTargetPrefixAsync(targetPrefix); + } + } +} diff --git a/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs b/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs new file mode 100644 index 0000000..b665cc4 --- /dev/null +++ b/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs @@ -0,0 +1,377 @@ +using System.Security.Claims; +using Akka.Actor; +using Bunit; +using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Components.Authorization; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using NSubstitute; +using ScadaLink.CentralUI.Components.Shared; +using ScadaLink.Commons.Entities.Sites; +using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Messages.Audit; +using ScadaLink.Communication; +using ScadaLink.Security; +using SiteCallsReportPage = ScadaLink.CentralUI.Components.Pages.SiteCalls.SiteCallsReport; + +namespace ScadaLink.CentralUI.Tests.Pages; + +/// +/// bUnit rendering tests for the Site Calls report page (Site Call Audit #22). +/// +/// Testability note: is a concrete class with +/// non-virtual methods, so NSubstitute cannot intercept it. The page's calls all +/// route through an injected (the Site Call Audit proxy), +/// so the tests wire a real, lightweight with a scripted +/// that replies with fixed responses — the same seam +/// SetSiteCallAudit exists for. Mirrors . +/// +public class SiteCallsReportPageTests : BunitContext +{ + private readonly ActorSystem _system = ActorSystem.Create("site-calls-report-tests"); + private readonly CommunicationService _comms; + + private static readonly Guid ParkedId = Guid.Parse("11111111-1111-1111-1111-111111111111"); + private static readonly Guid FailedId = Guid.Parse("22222222-2222-2222-2222-222222222222"); + + // Mutable scripted reply — individual tests can override before rendering. + private SiteCallQueryResponse _queryReply = new( + "q", true, null, + new List + { + new(ParkedId, "plant-a", "ApiOutbound", "ERP.GetOrder", "Parked", + RetryCount: 3, LastError: "HTTP 503 from ERP", HttpStatus: 503, + CreatedAtUtc: DateTime.UtcNow.AddMinutes(-30), UpdatedAtUtc: DateTime.UtcNow.AddMinutes(-5), + TerminalAtUtc: null, IsStuck: true), + new(FailedId, "plant-b", "DbOutbound", "Historian.Write", "Failed", + RetryCount: 1, LastError: "constraint violation", HttpStatus: null, + CreatedAtUtc: DateTime.UtcNow.AddHours(-2), UpdatedAtUtc: DateTime.UtcNow.AddHours(-2), + TerminalAtUtc: DateTime.UtcNow.AddHours(-2), IsStuck: false), + }, + NextAfterCreatedAtUtc: null, + NextAfterId: null); + + // Records the most recent retry/discard requests the actor received. + private readonly List _queryRequests = new(); + private readonly List _retryRequests = new(); + private readonly List _discardRequests = new(); + + // Scripted relay responses — overridable per test. + private RetrySiteCallResponse _retryReply = + new("q", SiteCallRelayOutcome.Applied, true, true, null); + private DiscardSiteCallResponse _discardReply = + new("q", SiteCallRelayOutcome.Applied, true, true, null); + + public SiteCallsReportPageTests() + { + _comms = new CommunicationService( + Options.Create(new CommunicationOptions()), + NullLogger.Instance); + + var auditProxy = _system.ActorOf(Props.Create(() => new ScriptedSiteCallAuditActor(this))); + _comms.SetSiteCallAudit(auditProxy); + + Services.AddSingleton(_comms); + Services.AddSingleton(new AlwaysConfirmDialogService()); + + var siteRepo = Substitute.For(); + siteRepo.GetAllSitesAsync(Arg.Any()) + .Returns(Task.FromResult>(new List + { + new("Plant A", "plant-a") { Id = 1 }, + new("Plant B", "plant-b") { Id = 2 }, + })); + Services.AddSingleton(siteRepo); + + var claims = new[] + { + new Claim("Username", "tester"), + new Claim(ClaimTypes.Role, "Deployment"), + }; + var user = new ClaimsPrincipal(new ClaimsIdentity(claims, "TestAuth")); + Services.AddSingleton(new TestAuthStateProvider(user)); + Services.AddAuthorizationCore(); + } + + [Fact] + public void Page_RequiresDeploymentPolicy() + { + var attr = typeof(SiteCallsReportPage) + .GetCustomAttributes(typeof(AuthorizeAttribute), true) + .Cast() + .FirstOrDefault(); + + Assert.NotNull(attr); + Assert.Equal(AuthorizationPolicies.RequireDeployment, attr!.Policy); + } + + [Fact] + public void Renders_SiteCallRows() + { + var cut = Render(); + + cut.WaitForAssertion(() => + { + Assert.Contains("ERP.GetOrder", cut.Markup); + Assert.Contains("Historian.Write", cut.Markup); + }); + } + + [Fact] + public void StuckRow_IsBadged() + { + var cut = Render(); + + cut.WaitForAssertion(() => + { + var stuckRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("ERP.GetOrder")); + Assert.Contains("badge", stuckRow.InnerHtml); + Assert.Contains("Stuck", stuckRow.TextContent); + }); + } + + [Fact] + public void RetryDiscardButtons_ShownOnlyOnParkedRows() + { + var cut = Render(); + + cut.WaitForState(() => cut.Markup.Contains("ERP.GetOrder")); + + var parkedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("ERP.GetOrder")); + var failedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("Historian.Write")); + + // The Parked row carries Retry + Discard buttons. + Assert.Contains(parkedRow.QuerySelectorAll("button"), + b => b.TextContent.Contains("Retry")); + Assert.Contains(parkedRow.QuerySelectorAll("button"), + b => b.TextContent.Contains("Discard")); + + // The Failed row carries neither — Retry/Discard are Parked-only. + Assert.DoesNotContain(failedRow.QuerySelectorAll("button"), + b => b.TextContent.Contains("Retry")); + Assert.DoesNotContain(failedRow.QuerySelectorAll("button"), + b => b.TextContent.Contains("Discard")); + } + + [Fact] + public void ClickRetry_OnParkedRow_RelaysRetryToOwningSite() + { + var cut = Render(); + + cut.WaitForState(() => cut.Markup.Contains("ERP.GetOrder")); + + var parkedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("ERP.GetOrder")); + var retryButton = parkedRow.QuerySelectorAll("button") + .First(b => b.TextContent.Contains("Retry")); + + retryButton.Click(); + + cut.WaitForAssertion(() => + { + Assert.Single(_retryRequests); + Assert.Equal(ParkedId, _retryRequests[0].TrackedOperationId); + // The relay carries the owning site so central can route it. + Assert.Equal("plant-a", _retryRequests[0].SourceSite); + }); + } + + [Fact] + public void ClickDiscard_OnParkedRow_RelaysDiscardToOwningSite() + { + var cut = Render(); + + cut.WaitForState(() => cut.Markup.Contains("ERP.GetOrder")); + + var parkedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("ERP.GetOrder")); + var discardButton = parkedRow.QuerySelectorAll("button") + .First(b => b.TextContent.Contains("Discard")); + + discardButton.Click(); + + cut.WaitForAssertion(() => + { + Assert.Single(_discardRequests); + Assert.Equal(ParkedId, _discardRequests[0].TrackedOperationId); + Assert.Equal("plant-a", _discardRequests[0].SourceSite); + }); + } + + [Fact] + public void RetryRelay_SiteUnreachable_ShowsDistinctMessage() + { + // The relay never reached the owning site — a transient transport + // condition, surfaced distinctly from a generic failure. + _retryReply = new RetrySiteCallResponse( + "q", SiteCallRelayOutcome.SiteUnreachable, Success: false, SiteReachable: false, + ErrorMessage: "Site plant-a is offline — relay not delivered."); + + var cut = Render(); + cut.WaitForState(() => cut.Markup.Contains("ERP.GetOrder")); + + var parkedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("ERP.GetOrder")); + parkedRow.QuerySelectorAll("button") + .First(b => b.TextContent.Contains("Retry")) + .Click(); + + cut.WaitForAssertion(() => + Assert.Contains("offline", cut.Markup)); + } + + [Fact] + public void QueryFailure_ShowsErrorMessage() + { + _queryReply = new SiteCallQueryResponse( + "q", false, "site call query backend unavailable", + new List(), null, null); + + var cut = Render(); + + cut.WaitForAssertion(() => + Assert.Contains("site call query backend unavailable", cut.Markup)); + } + + // ───────────────────────────────────────────────────────────────────────── + // Drill-in — every row carries a "View audit history" link to + // /audit/log?correlationId={TrackedOperationId}. + // ───────────────────────────────────────────────────────────────────────── + + [Fact] + public void SiteCallRow_ViewAuditHistory_Link_HasCorrectHref() + { + var cut = Render(); + + cut.WaitForAssertion(() => + { + // Both rows (Parked + Failed) surface the link — the drill-in is + // row-scope, not status-scope. + var parkedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("ERP.GetOrder")); + var link = parkedRow.QuerySelector("a[data-test^=\"audit-link-\"]"); + Assert.NotNull(link); + Assert.Equal( + $"/audit/log?correlationId={ParkedId}", + link!.GetAttribute("href")); + Assert.Contains("View audit history", link.TextContent); + + var failedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("Historian.Write")); + var failedLink = failedRow.QuerySelector("a[data-test^=\"audit-link-\"]"); + Assert.NotNull(failedLink); + Assert.Equal( + $"/audit/log?correlationId={FailedId}", + failedLink!.GetAttribute("href")); + }); + } + + // ───────────────────────────────────────────────────────────────────────── + // Keyset paging — Next is driven by the response's NextAfter* cursor, not by + // page numbers; the request echoes the cursor back to the actor. + // ───────────────────────────────────────────────────────────────────────── + + [Fact] + public void Paging_NextButton_HiddenWhenNoFurtherPage() + { + // The default reply returns 2 rows and no NextAfter* cursor — there is no + // further page, so Next is disabled. + var cut = Render(); + cut.WaitForState(() => cut.Markup.Contains("ERP.GetOrder")); + + var next = cut.Find("[data-test='site-calls-next']"); + Assert.True(next.HasAttribute("disabled")); + var prev = cut.Find("[data-test='site-calls-prev']"); + Assert.True(prev.HasAttribute("disabled")); + } + + [Fact] + public void Paging_NextButton_AdvancesUsingKeysetCursor() + { + // A full page (PageSize=50 rows) plus a NextAfter* cursor: Next is live + // and, when clicked, the follow-up query carries that cursor. + var firstPage = new List(); + for (var i = 0; i < 50; i++) + { + firstPage.Add(new SiteCallSummary( + Guid.NewGuid(), "plant-a", "ApiOutbound", $"ERP.Op{i}", "Delivered", + RetryCount: 0, LastError: null, HttpStatus: 200, + CreatedAtUtc: DateTime.UtcNow.AddMinutes(-i), UpdatedAtUtc: DateTime.UtcNow.AddMinutes(-i), + TerminalAtUtc: DateTime.UtcNow.AddMinutes(-i), IsStuck: false)); + } + + var cursorCreated = new DateTime(2026, 5, 20, 12, 0, 0, DateTimeKind.Utc); + var cursorId = Guid.Parse("99999999-9999-9999-9999-999999999999"); + _queryReply = new SiteCallQueryResponse( + "q", true, null, firstPage, + NextAfterCreatedAtUtc: cursorCreated, + NextAfterId: cursorId); + + var cut = Render(); + cut.WaitForState(() => cut.Markup.Contains("ERP.Op0")); + + var next = cut.Find("[data-test='site-calls-next']"); + Assert.False(next.HasAttribute("disabled")); + + next.Click(); + + cut.WaitForAssertion(() => + { + // Two queries fired: the initial load and the Next click. The second + // carries the keyset cursor echoed by the first response. + Assert.Equal(2, _queryRequests.Count); + Assert.Equal(cursorCreated, _queryRequests[1].AfterCreatedAtUtc); + Assert.Equal(cursorId, _queryRequests[1].AfterId); + }); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + _system.Terminate().Wait(TimeSpan.FromSeconds(5)); + } + base.Dispose(disposing); + } + + /// + /// Stand-in for the Site Call Audit actor. Replies to each message type with + /// the test's currently-scripted response. + /// + private sealed class ScriptedSiteCallAuditActor : ReceiveActor + { + public ScriptedSiteCallAuditActor(SiteCallsReportPageTests test) + { + Receive(r => + { + test._queryRequests.Add(r); + Sender.Tell(test._queryReply); + }); + Receive(r => + { + test._retryRequests.Add(r); + Sender.Tell(test._retryReply); + }); + Receive(r => + { + test._discardRequests.Add(r); + Sender.Tell(test._discardReply); + }); + } + } + + /// A dialog service that auto-confirms, so action paths run end-to-end. + private sealed class AlwaysConfirmDialogService : IDialogService + { + public Task ConfirmAsync(string title, string message, bool danger = false) + => Task.FromResult(true); + + public Task PromptAsync( + string title, string label, string initialValue = "", string? placeholder = null) + => Task.FromResult(null); + } +} From d73b459057a2d9e796a120a1adbe8657966e7522 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 04:59:12 -0400 Subject: [PATCH 13/23] fix(centralui): single relay toast, paging/skip polish, extra Site Calls tests --- .../Pages/SiteCalls/SiteCallsReport.razor | 3 + .../Pages/SiteCalls/SiteCallsReport.razor.cs | 34 ++++--- ...ScadaLink.CentralUI.PlaywrightTests.csproj | 7 ++ .../SiteCalls/SiteCallsPageTests.cs | 92 +++++++++++++++---- .../Pages/SiteCallsReportPageTests.cs | 82 +++++++++++++++++ 5 files changed, 190 insertions(+), 28 deletions(-) diff --git a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor index 019b316..30ece7b 100644 --- a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor +++ b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor @@ -201,6 +201,9 @@ backwards and the response's NextAfter* cursor to step forwards. *@
+ @* No "of N" total: keyset paging has no cheap total-count, so + the label is intentionally page-number-only. Do not "fix" + this by adding a total — that would require a COUNT(*). *@ Page @(_cursorStack.Count + 1) · @_siteCalls.Count rows
diff --git a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs index 2a37606..079c009 100644 --- a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs +++ b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs @@ -224,10 +224,20 @@ public partial class SiteCallsReport } /// - /// Surface a relay outcome on the toast. The - /// case is deliberately distinct from a generic failure — the action was not - /// applied but the operator can retry once the site is back online. + /// Surface a relay outcome on the toast — exactly one toast per relay + /// response. The case is + /// deliberately distinct from a generic failure: the action was not applied + /// but the operator can retry once the site is back online. /// + /// + /// The switch is exhaustive, so it owns + /// the single toast. is a redundant + /// cross-check on the same signal (the contract sets it false only + /// for ); it is folded + /// INTO the case rather + /// than firing a second toast — an OperationFailed response that also + /// reports an unreachable site shows the unreachable wording, once. + /// private void ShowRelayOutcome( SiteCallRelayOutcome outcome, bool siteReachable, string? errorMessage, string appliedMessage) { @@ -245,19 +255,19 @@ public partial class SiteCallsReport ?? "Site unreachable — the relay did not reach the owning site. " + "Try again once the site is back online."); break; + case SiteCallRelayOutcome.OperationFailed when !siteReachable: + // An OperationFailed response that nonetheless reports the site + // unreachable: trust the reachability signal and show the + // unreachable wording instead of the generic failure message. + _toast.ShowError(errorMessage + ?? "Site unreachable — the relay did not reach the owning site. " + + "Try again once the site is back online."); + break; case SiteCallRelayOutcome.OperationFailed: default: _toast.ShowError(errorMessage ?? "The site could not apply the action."); break; } - - // Defensive: a non-Applied/non-Unreachable outcome that somehow reports an - // unreachable site still gets the unreachable wording. - if (outcome != SiteCallRelayOutcome.SiteUnreachable && !siteReachable - && outcome != SiteCallRelayOutcome.Applied) - { - _toast.ShowError("Site unreachable — the relay did not reach the owning site."); - } } private async Task ShowDetail(SiteCallSummary c) @@ -353,6 +363,8 @@ public partial class SiteCallsReport ? null : new DateTimeOffset(DateTime.SpecifyKind(value.Value, DateTimeKind.Utc)); + // A Guid's "N" format is always exactly 32 hex chars, so the [..12] slice is + // always in range — no length guard needed. private static string ShortId(Guid id) => id.ToString("N")[..12]; private static string StatusBadgeClass(string status) => status switch diff --git a/tests/ScadaLink.CentralUI.PlaywrightTests/ScadaLink.CentralUI.PlaywrightTests.csproj b/tests/ScadaLink.CentralUI.PlaywrightTests/ScadaLink.CentralUI.PlaywrightTests.csproj index dd87843..e89e456 100644 --- a/tests/ScadaLink.CentralUI.PlaywrightTests/ScadaLink.CentralUI.PlaywrightTests.csproj +++ b/tests/ScadaLink.CentralUI.PlaywrightTests/ScadaLink.CentralUI.PlaywrightTests.csproj @@ -15,6 +15,13 @@ + + diff --git a/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs b/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs index 6a6c6e9..242c197 100644 --- a/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs +++ b/tests/ScadaLink.CentralUI.PlaywrightTests/SiteCalls/SiteCallsPageTests.cs @@ -1,4 +1,5 @@ using Microsoft.Playwright; +using Xunit; namespace ScadaLink.CentralUI.PlaywrightTests.SiteCalls; @@ -27,8 +28,16 @@ namespace ScadaLink.CentralUI.PlaywrightTests.SiteCalls; /// Audit Log pre-filtered to the call's TrackedOperationId. /// RetryDiscardVisibility — Retry/Discard appear only on Parked /// rows, never on Failed (or other) rows. +/// RetryClickThrough — clicking Retry on a Parked row confirms +/// the dialog, relays to the owning site, and surfaces an outcome toast. /// /// +/// +/// +/// The DB-seeding tests are + Skip.IfNot: +/// when the cluster / MSSQL is unreachable they report as Skipped (not Failed), +/// matching the established ScadaLink.ConfigurationDatabase.Tests idiom. +/// ///
[Collection("Playwright")] public class SiteCallsPageTests @@ -55,15 +64,15 @@ public class SiteCallsPageTests await Assertions.Expect(page.Locator("[data-test='site-calls-query']")).ToBeVisibleAsync(); } - [Fact] + /// Skip reason shared by the DB-seeding tests when MSSQL is down. + private const string DbUnavailableSkipReason = + "SiteCallDataSeeder cannot reach MSSQL at localhost:1433 — bring up infra/docker-compose and docker/deploy.sh, " + + "or set SCADALINK_PLAYWRIGHT_DB to a reachable connection string."; + + [SkippableFact] public async Task FilterNarrowing_ChannelFilterShrinksGrid() { - if (!await SiteCallDataSeeder.IsAvailableAsync()) - { - throw new InvalidOperationException( - "SiteCallDataSeeder cannot reach MSSQL at localhost:1433 — bring up infra/docker-compose and docker/deploy.sh, " + - "or set SCADALINK_PLAYWRIGHT_DB to a reachable connection string."); - } + Skip.IfNot(await SiteCallDataSeeder.IsAvailableAsync(), DbUnavailableSkipReason); var runId = Guid.NewGuid().ToString("N"); var targetPrefix = $"playwright-test/sc-filter/{runId}/"; @@ -112,13 +121,10 @@ public class SiteCallsPageTests } } - [Fact] + [SkippableFact] public async Task DrillIn_ViewAuditHistory_NavigatesToPreFilteredAuditLog() { - if (!await SiteCallDataSeeder.IsAvailableAsync()) - { - throw new InvalidOperationException("MSSQL unavailable; see FilterNarrowing test for setup instructions."); - } + Skip.IfNot(await SiteCallDataSeeder.IsAvailableAsync(), DbUnavailableSkipReason); var runId = Guid.NewGuid().ToString("N"); var targetPrefix = $"playwright-test/sc-drill-in/{runId}/"; @@ -162,13 +168,10 @@ public class SiteCallsPageTests } } - [Fact] + [SkippableFact] public async Task RetryDiscard_VisibleOnlyOnParkedRows() { - if (!await SiteCallDataSeeder.IsAvailableAsync()) - { - throw new InvalidOperationException("MSSQL unavailable; see FilterNarrowing test for setup instructions."); - } + Skip.IfNot(await SiteCallDataSeeder.IsAvailableAsync(), DbUnavailableSkipReason); var runId = Guid.NewGuid().ToString("N"); var targetPrefix = $"playwright-test/sc-actions/{runId}/"; @@ -221,4 +224,59 @@ public class SiteCallsPageTests await SiteCallDataSeeder.DeleteByTargetPrefixAsync(targetPrefix); } } + + [SkippableFact] + public async Task RetryClickThrough_OnParkedRow_ConfirmsRelayAndShowsOutcomeToast() + { + Skip.IfNot(await SiteCallDataSeeder.IsAvailableAsync(), DbUnavailableSkipReason); + + var runId = Guid.NewGuid().ToString("N"); + var targetPrefix = $"playwright-test/sc-retry-click/{runId}/"; + var parkedId = Guid.NewGuid(); + var now = DateTime.UtcNow; + + try + { + // A single Parked row — the only status from which Retry/Discard can + // be relayed to the owning site. + await SiteCallDataSeeder.InsertSiteCallAsync( + trackedOperationId: parkedId, channel: "ApiOutbound", target: targetPrefix + "parked", + sourceSite: "plant-a", status: "Parked", retryCount: 3, + lastError: "HTTP 503 from ERP", httpStatus: 503, + createdAtUtc: now, updatedAtUtc: now); + + var page = await _fixture.NewAuthenticatedPageAsync(); + await page.GotoAsync($"{PlaywrightFixture.BaseUrl}{SiteCallsUrl}"); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + await page.Locator("#sc-search").FillAsync(targetPrefix + "parked"); + await page.Locator("[data-test='site-calls-query']").ClickAsync(); + await page.WaitForLoadStateAsync(LoadState.NetworkIdle); + + var parkedRow = page.Locator("tbody tr", new() { HasText = targetPrefix + "parked" }); + await Assertions.Expect(parkedRow).ToBeVisibleAsync(); + + // Click Retry — this opens the confirmation dialog (DialogHost modal). + await parkedRow.Locator("button:has-text('Retry')").ClickAsync(); + + // Confirm the relay in the dialog footer ("Confirm" — the non-danger + // label; Discard would render "Delete"). + var confirmButton = page.Locator(".modal-footer button:has-text('Confirm')"); + await Assertions.Expect(confirmButton).ToBeVisibleAsync(); + await confirmButton.ClickAsync(); + + // The relay outcome surfaces on a toast — Applied, NotParked or, if + // the owning site is offline in this environment, SiteUnreachable. + // We only assert that an outcome toast appears (exactly one — the + // single-toast contract), not which one, since the live cluster + // state determines the outcome. + var toast = page.Locator(".toast"); + await Assertions.Expect(toast).ToBeVisibleAsync(); + Assert.Equal(1, await toast.CountAsync()); + } + finally + { + await SiteCallDataSeeder.DeleteByTargetPrefixAsync(targetPrefix); + } + } } diff --git a/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs b/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs index b665cc4..e4fa93d 100644 --- a/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs +++ b/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs @@ -329,6 +329,88 @@ public class SiteCallsReportPageTests : BunitContext }); } + [Fact] + public void Paging_PrevButton_PopsBackStackAndRefetchesPriorCursor() + { + // The keyset back-stack is the trickiest paging path: Next pushes the + // current cursor, Prev pops it and refetches that prior page. Page 1 is + // opened with the empty (null, null) cursor, so after Next→Previous the + // follow-up query must carry (null, null) again. + var firstPage = new List(); + for (var i = 0; i < 50; i++) + { + firstPage.Add(new SiteCallSummary( + Guid.NewGuid(), "plant-a", "ApiOutbound", $"ERP.Op{i}", "Delivered", + RetryCount: 0, LastError: null, HttpStatus: 200, + CreatedAtUtc: DateTime.UtcNow.AddMinutes(-i), UpdatedAtUtc: DateTime.UtcNow.AddMinutes(-i), + TerminalAtUtc: DateTime.UtcNow.AddMinutes(-i), IsStuck: false)); + } + + var cursorCreated = new DateTime(2026, 5, 20, 12, 0, 0, DateTimeKind.Utc); + var cursorId = Guid.Parse("99999999-9999-9999-9999-999999999999"); + _queryReply = new SiteCallQueryResponse( + "q", true, null, firstPage, + NextAfterCreatedAtUtc: cursorCreated, + NextAfterId: cursorId); + + var cut = Render(); + cut.WaitForState(() => cut.Markup.Contains("ERP.Op0")); + + // Step forward — query 2 carries the keyset cursor. + var next = cut.Find("[data-test='site-calls-next']"); + next.Click(); + cut.WaitForAssertion(() => + { + Assert.Equal(2, _queryRequests.Count); + Assert.Equal(cursorCreated, _queryRequests[1].AfterCreatedAtUtc); + }); + + // Previous is now live (the back-stack has one entry); click it. + var prev = cut.Find("[data-test='site-calls-prev']"); + Assert.False(prev.HasAttribute("disabled")); + prev.Click(); + + cut.WaitForAssertion(() => + { + // Query 3 is the Previous refetch — the back-stack popped the page-1 + // cursor, which is the empty (null, null) first-page cursor. + Assert.Equal(3, _queryRequests.Count); + Assert.Null(_queryRequests[2].AfterCreatedAtUtc); + Assert.Null(_queryRequests[2].AfterId); + // Back on page 1, the back-stack is empty again so Previous re-disables. + Assert.True(cut.Find("[data-test='site-calls-prev']").HasAttribute("disabled")); + }); + } + + [Fact] + public void RetryRelay_NotParked_ShowsInfoMessage_AndExactlyOneToast() + { + // NotParked is a definitive answer from the site (nothing to do), not a + // failure — it surfaces as a single info toast, never an error. This + // also guards the single-toast contract: a non-Applied outcome must + // produce exactly one toast. + _retryReply = new RetrySiteCallResponse( + "q", SiteCallRelayOutcome.NotParked, Success: false, SiteReachable: true, + ErrorMessage: "The cached call is no longer parked."); + + var cut = Render(); + cut.WaitForState(() => cut.Markup.Contains("ERP.GetOrder")); + + var parkedRow = cut.FindAll("tbody tr") + .First(r => r.TextContent.Contains("ERP.GetOrder")); + parkedRow.QuerySelectorAll("button") + .First(b => b.TextContent.Contains("Retry")) + .Click(); + + cut.WaitForAssertion(() => + { + Assert.Contains("no longer parked", cut.Markup); + // Exactly one toast — the ShowRelayOutcome switch owns the single + // toast; no second (error) toast piggybacks on the same response. + Assert.Single(cut.FindAll(".toast")); + }); + } + protected override void Dispose(bool disposing) { if (disposing) From 44f1ee372a875c28023e5b39166c7d512be4c0cf Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 05:04:16 -0400 Subject: [PATCH 14/23] feat(centralui): Site Call KPI tiles on the Health dashboard --- .../Components/Health/SiteCallKpiTiles.razor | 60 ++++++ .../Health/SiteCallKpiTiles.razor.cs | 130 +++++++++++++ .../Components/Pages/Monitoring/Health.razor | 45 +++++ .../Health/SiteCallKpiTilesTests.cs | 177 ++++++++++++++++++ .../Pages/HealthPageTests.cs | 70 +++++++ 5 files changed, 482 insertions(+) create mode 100644 src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor create mode 100644 src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor.cs create mode 100644 tests/ScadaLink.CentralUI.Tests/Components/Health/SiteCallKpiTilesTests.cs diff --git a/src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor b/src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor new file mode 100644 index 0000000..624e4a2 --- /dev/null +++ b/src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor @@ -0,0 +1,60 @@ +@* + Site Call Audit (#22) Task 7 — three Health-dashboard KPI tiles for the + Site Call channel: Buffered / Parked / Stuck. Renders Bootstrap card tiles + in a single row, each acting as a navigation link to a pre-filtered Site + Calls report view. The component is purely presentational — the parent page + owns the refresh loop and passes the latest snapshot via the Snapshot + parameter. Mirrors AuditKpiTiles and the Notification Outbox KPI section. +*@ + +@namespace ScadaLink.CentralUI.Components.Health +@inject NavigationManager Navigation + +
+
Site Calls
+ View details → +
+
+ @* ── Buffered tile ─────────────────────────────────────────────────────── *@ +
+ +
+ + @* ── Stuck tile ────────────────────────────────────────────────────────── *@ +
+ +
+ + @* ── Parked tile ───────────────────────────────────────────────────────── *@ +
+ +
+
+@if (!IsAvailable && !string.IsNullOrEmpty(ErrorMessage)) +{ +
Site Call KPIs unavailable: @ErrorMessage
+} diff --git a/src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor.cs b/src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor.cs new file mode 100644 index 0000000..1ed9a9a --- /dev/null +++ b/src/ScadaLink.CentralUI/Components/Health/SiteCallKpiTiles.razor.cs @@ -0,0 +1,130 @@ +using Microsoft.AspNetCore.Components; +using ScadaLink.Commons.Messages.Audit; + +namespace ScadaLink.CentralUI.Components.Health; + +/// +/// Site Call Audit (#22) Task 7 code-behind for . +/// Renders three KPI tiles — Buffered, Stuck, Parked — from a +/// the parent Health dashboard supplies. +/// Tiles act as drill-in links: clicking navigates to /site-calls/report +/// with the relevant query-string filter pre-applied. Mirrors +/// and the Notification Outbox KPI section on the +/// Health dashboard. +/// +/// +/// +/// Why purely presentational. The Health dashboard already owns a 10s +/// auto-refresh loop; pushing that into the tile component would either +/// duplicate it (one timer per tile) or awkwardly couple back to the page. The +/// parent passes a fresh every refresh and the +/// tile component re-renders. This is the same contract +/// follows. +/// +/// +/// Snapshot shape. Unlike — which takes a +/// dedicated AuditLogKpiSnapshot type — Site Call KPIs travel in the +/// message itself (it carries the KPI fields +/// directly), so that record doubles as the snapshot here. +/// is a separate flag rather than the record's own Success so the parent +/// can also surface a transport failure (an Ask that threw) as unavailable. +/// +/// +/// Threshold borders. Mirrors the Notification Outbox tile pattern: the +/// Parked tile gets a danger border when ParkedCount > 0; the Stuck +/// tile gets a warning border when StuckCount > 0. Buffered is a plain +/// count tile with no threshold colour — a non-zero buffer is normal operation. +/// +/// +public partial class SiteCallKpiTiles +{ + /// + /// Latest KPI snapshot. null means the parent has not loaded it yet + /// or the load failed — the tiles render em dashes in that case. + /// + [Parameter] public SiteCallKpiResponse? Snapshot { get; set; } + + /// + /// True when is a successful query result. False when + /// the parent's refresh threw, or the response itself reported a fault, and + /// the displayed values should be rendered as em dashes with an error + /// explanation underneath. + /// + [Parameter] public bool IsAvailable { get; set; } + + /// + /// Optional error message to render underneath the tiles when + /// is false. Mirrors how the Notification Outbox + /// section on the Health dashboard surfaces transient KPI failures. + /// + [Parameter] public string? ErrorMessage { get; set; } + + // ── Buffered tile ─────────────────────────────────────────────────────── + + private string BufferedDisplay => + IsAvailable && Snapshot is not null + ? Snapshot.BufferedCount.ToString("N0") + : "—"; + + private void NavigateToBuffered() + { + // Buffered is "everything still in flight" — no single status maps to + // it, so the natural drill-in is the unfiltered Site Calls report sorted + // by newest, mirroring how the Audit volume/backlog tiles drop the + // operator on the unfiltered Audit Log grid. + Navigation.NavigateTo("/site-calls/report"); + } + + // ── Stuck tile ────────────────────────────────────────────────────────── + + private string StuckDisplay => + IsAvailable && Snapshot is not null + ? Snapshot.StuckCount.ToString("N0") + : "—"; + + // Stuck above zero is a warning signal — cached calls that have been + // Pending/Retrying past the stuck-age threshold. Matches the Notification + // Outbox Stuck tile (border-warning when StuckCount > 0). + private string StuckBorderClass => + IsAvailable && Snapshot is not null && Snapshot.StuckCount > 0 + ? "border-warning" + : string.Empty; + + private string StuckTextClass => + IsAvailable && Snapshot is not null && Snapshot.StuckCount > 0 + ? "text-warning" + : string.Empty; + + private void NavigateToStuck() + { + // Drill in with the report's "stuck only" filter pre-applied. + Navigation.NavigateTo("/site-calls/report?stuck=true"); + } + + // ── Parked tile ───────────────────────────────────────────────────────── + + private string ParkedDisplay => + IsAvailable && Snapshot is not null + ? Snapshot.ParkedCount.ToString("N0") + : "—"; + + // Parked above zero is a danger signal — cached calls that exhausted retries + // and need an operator Retry/Discard. Matches the Notification Outbox Parked + // tile (border-danger when ParkedCount > 0). + private string ParkedBorderClass => + IsAvailable && Snapshot is not null && Snapshot.ParkedCount > 0 + ? "border-danger" + : string.Empty; + + private string ParkedTextClass => + IsAvailable && Snapshot is not null && Snapshot.ParkedCount > 0 + ? "text-danger" + : string.Empty; + + private void NavigateToParked() + { + // Drill in pre-filtered to Parked — the report's Status filter accepts + // ?status=Parked and Parked rows carry the Retry/Discard relay actions. + Navigation.NavigateTo("/site-calls/report?status=Parked"); + } +} diff --git a/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor b/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor index 58a87d1..198a161 100644 --- a/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor +++ b/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor @@ -8,6 +8,7 @@ @using ScadaLink.Commons.Interfaces.Repositories @using ScadaLink.HealthMonitoring @using ScadaLink.Commons.Messages.Notification +@using ScadaLink.Commons.Messages.Audit @using ScadaLink.Communication @implements IDisposable @inject ICentralHealthAggregator HealthAggregator @@ -60,6 +61,12 @@
Notification Outbox KPIs unavailable: @_outboxKpiError
} + @* Site Call Audit (#22) Task 7 — three KPI tiles for the Site Call channel + (buffered / stuck / parked). Refreshed alongside the site states. *@ + + @* Audit Log (#23) M7 Bundle E — three KPI tiles for the Audit channel (volume / error rate / backlog). Refreshed alongside the site states. *@ diff --git a/tests/ScadaLink.CentralUI.Tests/Components/Health/SiteCallKpiTilesTests.cs b/tests/ScadaLink.CentralUI.Tests/Components/Health/SiteCallKpiTilesTests.cs new file mode 100644 index 0000000..8dfdd04 --- /dev/null +++ b/tests/ScadaLink.CentralUI.Tests/Components/Health/SiteCallKpiTilesTests.cs @@ -0,0 +1,177 @@ +using Bunit; +using Bunit.TestDoubles; +using Microsoft.AspNetCore.Components; +using Microsoft.Extensions.DependencyInjection; +using ScadaLink.CentralUI.Components.Health; +using ScadaLink.Commons.Messages.Audit; + +namespace ScadaLink.CentralUI.Tests.Components.Health; + +/// +/// bUnit tests for (Site Call Audit #22, Task 7). +/// The component renders three Bootstrap-card tiles — Buffered, Stuck, Parked — +/// from a single snapshot. The tests pin: +/// +/// +/// Three-tile render contract (data-test attributes for stable selectors). +/// Tile values render the snapshot's counters. +/// Threshold borders fire correctly — danger on Parked > 0, warning +/// on Stuck > 0, none when those counts are zero, none on Buffered. +/// Unavailable snapshot renders em dashes plus the error message. +/// Tile clicks navigate to the correct pre-filtered Site Calls report URL. +/// +/// +public class SiteCallKpiTilesTests : BunitContext +{ + private static SiteCallKpiResponse MakeSnapshot(int buffered, int parked, int stuck) => + new( + CorrelationId: "k", + Success: true, + ErrorMessage: null, + BufferedCount: buffered, + ParkedCount: parked, + FailedLastInterval: 0, + DeliveredLastInterval: 0, + OldestPendingAge: null, + StuckCount: stuck); + + [Fact] + public void Renders_ThreeTiles_FromSnapshot() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 120, parked: 3, stuck: 7)) + .Add(c => c.IsAvailable, true)); + + // Three stable data-test selectors — the contract for both these tests + // and any future Playwright sweep. + Assert.Contains("data-test=\"site-call-kpi-buffered\"", cut.Markup); + Assert.Contains("data-test=\"site-call-kpi-stuck\"", cut.Markup); + Assert.Contains("data-test=\"site-call-kpi-parked\"", cut.Markup); + + // Tile values render the snapshot's counters. + Assert.Contains(">120<", cut.Markup); // buffered + Assert.Contains(">7<", cut.Markup); // stuck + Assert.Contains(">3<", cut.Markup); // parked + } + + [Fact] + public void UnavailableSnapshot_RendersEmDashes_AndErrorMessage() + { + var cut = Render(p => p + .Add(c => c.Snapshot, (SiteCallKpiResponse?)null) + .Add(c => c.IsAvailable, false) + .Add(c => c.ErrorMessage, "site call repository unavailable")); + + // All three tiles show em dashes — em dash (U+2014) "—" must appear. + Assert.Contains("—", cut.Markup); + // Inline error message renders below. + Assert.Contains("Site Call KPIs unavailable", cut.Markup); + Assert.Contains("site call repository unavailable", cut.Markup); + } + + [Fact] + public void ParkedTile_GetsDangerBorder_WhenParkedAboveZero() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 0, parked: 4, stuck: 0)) + .Add(c => c.IsAvailable, true)); + + var tile = cut.Find("[data-test=\"site-call-kpi-parked\"]"); + Assert.Contains("border-danger", tile.GetAttribute("class") ?? string.Empty); + } + + [Fact] + public void ParkedTile_NoDangerBorder_WhenParkedZero() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 9, parked: 0, stuck: 0)) + .Add(c => c.IsAvailable, true)); + + var tile = cut.Find("[data-test=\"site-call-kpi-parked\"]"); + Assert.DoesNotContain("border-danger", tile.GetAttribute("class") ?? string.Empty); + } + + [Fact] + public void StuckTile_GetsWarningBorder_WhenStuckAboveZero() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 0, parked: 0, stuck: 6)) + .Add(c => c.IsAvailable, true)); + + var tile = cut.Find("[data-test=\"site-call-kpi-stuck\"]"); + Assert.Contains("border-warning", tile.GetAttribute("class") ?? string.Empty); + // Warning, not danger — Stuck is the softer signal. + Assert.DoesNotContain("border-danger", tile.GetAttribute("class") ?? string.Empty); + } + + [Fact] + public void StuckTile_NoWarningBorder_WhenStuckZero() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 9, parked: 0, stuck: 0)) + .Add(c => c.IsAvailable, true)); + + var tile = cut.Find("[data-test=\"site-call-kpi-stuck\"]"); + Assert.DoesNotContain("border-warning", tile.GetAttribute("class") ?? string.Empty); + } + + [Fact] + public void BufferedTile_HasNoThresholdBorder_EvenWithHighCount() + { + // A non-zero buffer is normal operation — the Buffered tile is a plain + // count tile and never gets a danger/warning border. + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 5000, parked: 0, stuck: 0)) + .Add(c => c.IsAvailable, true)); + + var tile = cut.Find("[data-test=\"site-call-kpi-buffered\"]"); + var cls = tile.GetAttribute("class") ?? string.Empty; + Assert.DoesNotContain("border-danger", cls); + Assert.DoesNotContain("border-warning", cls); + } + + [Fact] + public void BufferedTile_Click_NavigatesToUnfilteredSiteCallsReport() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 50, parked: 0, stuck: 0)) + .Add(c => c.IsAvailable, true)); + + var nav = (BunitNavigationManager)Services.GetRequiredService(); + var tile = cut.Find("[data-test=\"site-call-kpi-buffered\"]"); + tile.Click(); + + // Unfiltered /site-calls/report — no query string. + Assert.EndsWith("/site-calls/report", nav.Uri); + } + + [Fact] + public void StuckTile_Click_NavigatesToSiteCallsReport_WithStuckFilter() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 0, parked: 0, stuck: 6)) + .Add(c => c.IsAvailable, true)); + + var nav = (BunitNavigationManager)Services.GetRequiredService(); + var tile = cut.Find("[data-test=\"site-call-kpi-stuck\"]"); + tile.Click(); + + // Spec: Stuck tile drills into the report's "stuck only" filter. + Assert.Contains("/site-calls/report?stuck=true", nav.Uri); + } + + [Fact] + public void ParkedTile_Click_NavigatesToSiteCallsReport_WithParkedStatusFilter() + { + var cut = Render(p => p + .Add(c => c.Snapshot, MakeSnapshot(buffered: 0, parked: 4, stuck: 0)) + .Add(c => c.IsAvailable, true)); + + var nav = (BunitNavigationManager)Services.GetRequiredService(); + var tile = cut.Find("[data-test=\"site-call-kpi-parked\"]"); + tile.Click(); + + // Spec: Parked tile drills into ?status=Parked. + Assert.Contains("/site-calls/report?status=Parked", nav.Uri); + } +} diff --git a/tests/ScadaLink.CentralUI.Tests/Pages/HealthPageTests.cs b/tests/ScadaLink.CentralUI.Tests/Pages/HealthPageTests.cs index 78dbd52..b575f8c 100644 --- a/tests/ScadaLink.CentralUI.Tests/Pages/HealthPageTests.cs +++ b/tests/ScadaLink.CentralUI.Tests/Pages/HealthPageTests.cs @@ -9,6 +9,7 @@ using NSubstitute; using ScadaLink.CentralUI.Services; using ScadaLink.Commons.Entities.Sites; using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Messages.Audit; using ScadaLink.Commons.Messages.Notification; using ScadaLink.Commons.Types; using ScadaLink.Communication; @@ -37,6 +38,13 @@ public class HealthPageTests : BunitContext new("k", true, null, QueueDepth: 12, StuckCount: 4, ParkedCount: 3, DeliveredLastInterval: 88, OldestPendingAge: TimeSpan.FromMinutes(6)); + // Site Call Audit (#22) Task 7 — mutable scripted Site Call KPI reply. Tests + // that target the Site Call tiles override this before rendering. + private SiteCallKpiResponse _siteCallKpiReply = + new("k", true, null, BufferedCount: 9, ParkedCount: 2, FailedLastInterval: 1, + DeliveredLastInterval: 40, OldestPendingAge: TimeSpan.FromMinutes(3), + StuckCount: 5); + public HealthPageTests() { _comms = new CommunicationService( @@ -45,6 +53,9 @@ public class HealthPageTests : BunitContext var outbox = _system.ActorOf(Props.Create(() => new ScriptedOutboxActor(this))); _comms.SetNotificationOutbox(outbox); + + var siteCallAudit = _system.ActorOf(Props.Create(() => new ScriptedSiteCallAuditActor(this))); + _comms.SetSiteCallAudit(siteCallAudit); Services.AddSingleton(_comms); var aggregator = Substitute.For(); @@ -133,6 +144,53 @@ public class HealthPageTests : BunitContext }); } + [Fact] + public void Renders_SiteCallKpiTiles_WithValues() + { + var cut = Render(); + + // KPI data arrives via an async actor Ask after first render. + cut.WaitForAssertion(() => + { + Assert.Contains("Site Calls", cut.Markup); + // The three Site Call tiles render at the documented data-test selectors. + Assert.Contains("data-test=\"site-call-kpi-buffered\"", cut.Markup); + Assert.Contains("data-test=\"site-call-kpi-stuck\"", cut.Markup); + Assert.Contains("data-test=\"site-call-kpi-parked\"", cut.Markup); + // KPI numeric values surface in the tiles. + Assert.Contains(">9<", cut.Markup); // BufferedCount + Assert.Contains(">5<", cut.Markup); // StuckCount + Assert.Contains(">2<", cut.Markup); // ParkedCount + }); + } + + [Fact] + public void RendersLinkToTheSiteCallsReportPage() + { + var cut = Render(); + var link = cut.Find("a[href='/site-calls/report']"); + Assert.Contains("View details", link.TextContent); + } + + [Fact] + public void SiteCallKpiFailure_ShowsGracefulFallback() + { + _siteCallKpiReply = new SiteCallKpiResponse( + "k", false, "site call repository unavailable", 0, 0, 0, 0, null, 0); + + var cut = Render(); + + cut.WaitForAssertion(() => + { + // Failure must not crash the page; tiles fall back to a dash and the + // inline error message surfaces. + Assert.Contains("Site Calls", cut.Markup); + Assert.Contains("Site Call KPIs unavailable", cut.Markup); + Assert.Contains("site call repository unavailable", cut.Markup); + Assert.Contains(">—<", cut.Markup); + }); + } + [Fact] public void OutboxKpiFailure_ShowsGracefulFallback() { @@ -170,4 +228,16 @@ public class HealthPageTests : BunitContext Receive(_ => Sender.Tell(test._kpiReply)); } } + + /// + /// Stand-in for the Site Call Audit actor. Replies to the KPI request with + /// the test's currently-scripted response. + /// + private sealed class ScriptedSiteCallAuditActor : ReceiveActor + { + public ScriptedSiteCallAuditActor(HealthPageTests test) + { + Receive(_ => Sender.Tell(test._siteCallKpiReply)); + } + } } From b3b02a8cb6e9f083ec4d22687d0aadb507294aab Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 05:08:50 -0400 Subject: [PATCH 15/23] fix(centralui): apply status/stuck query-string filters on the Site Calls page --- .../Pages/SiteCalls/SiteCallsReport.razor.cs | 65 +++++++++++++++++ .../Pages/SiteCallsReportPageTests.cs | 73 +++++++++++++++++++ 2 files changed, 138 insertions(+) diff --git a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs index 079c009..c8f0a21 100644 --- a/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs +++ b/src/ScadaLink.CentralUI/Components/Pages/SiteCalls/SiteCallsReport.razor.cs @@ -1,3 +1,5 @@ +using Microsoft.AspNetCore.Components; +using Microsoft.AspNetCore.WebUtilities; using Microsoft.Extensions.Logging; using ScadaLink.CentralUI.Components.Shared; using ScadaLink.Commons.Entities.Sites; @@ -26,11 +28,32 @@ namespace ScadaLink.CentralUI.Components.Pages.SiteCalls; /// a relay that never reaches the site is a transient transport condition, surfaced /// to the operator differently from a generic failure. /// +/// +/// +/// Query-string drill-in: the Health-dashboard Site Call KPI tiles deep-link here +/// with ?status=Parked (Parked tile) or ?stuck=true (Stuck tile). On +/// initialization those params seed / +/// BEFORE the first , so the first grid load is already +/// filtered and the filter card controls reflect the seeded values. Parsing is lax +/// — an absent, blank, or unrecognised value is silently dropped and the page loads +/// unfiltered, mirroring AuditLogPage's drill-in convention. +/// ///
public partial class SiteCallsReport { private const int PageSize = 50; + [Inject] private NavigationManager Navigation { get; set; } = null!; + + // The Status filter binds. An unrecognised value leaves the filter unset. + var match = ValidStatuses.FirstOrDefault( + s => string.Equals(s, v?.Trim(), StringComparison.OrdinalIgnoreCase)); + if (match is not null) + { + _statusFilter = match; + } + } + + if (query.TryGetValue("stuck", out var stuckValues) + && bool.TryParse(stuckValues.ToString(), out var stuck)) + { + _stuckOnly = stuck; + } + } + /// Re-fetch the current page (Refresh button, and after a relay action). private async Task RefreshAll() { diff --git a/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs b/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs index e4fa93d..6dd2852 100644 --- a/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs +++ b/tests/ScadaLink.CentralUI.Tests/Pages/SiteCallsReportPageTests.cs @@ -1,7 +1,9 @@ using System.Security.Claims; using Akka.Actor; using Bunit; +using Bunit.TestDoubles; using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Components; using Microsoft.AspNetCore.Components.Authorization; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging.Abstractions; @@ -411,6 +413,77 @@ public class SiteCallsReportPageTests : BunitContext }); } + // ───────────────────────────────────────────────────────────────────────── + // Query-string drill-in — the Health-dashboard Site Call KPI tiles deep-link + // here with ?status=Parked (Parked tile) and ?stuck=true (Stuck tile). The + // params must seed the filter BEFORE the first query so the initial grid load + // is already filtered, and the filter card controls must reflect the values. + // ───────────────────────────────────────────────────────────────────────── + + [Fact] + public void NavigateWithStatusParkedParam_LoadsGridPreFilteredToParked() + { + // The Parked KPI tile emits ?status=Parked — set the URI before render. + var nav = (BunitNavigationManager)Services.GetRequiredService(); + nav.NavigateTo("/site-calls/report?status=Parked"); + + var cut = Render(); + + cut.WaitForAssertion(() => + { + // The first (and only) query the page issues carries the Parked + // status filter — the grid load is pre-filtered, not unfiltered. + Assert.Single(_queryRequests); + Assert.Equal("Parked", _queryRequests[0].StatusFilter); + + // The Status