Compare commits
113 Commits
381eea63b1
...
d08cedc8c4
| Author | SHA1 | Date | |
|---|---|---|---|
| d08cedc8c4 | |||
| ec82027bd2 | |||
| d8d47821e3 | |||
| 38b51ef894 | |||
| d8dfbc79f4 | |||
| c2cd62e72a | |||
| e681a1f0e1 | |||
| 7589ea8da9 | |||
| e471a5a855 | |||
| 77cc4865c5 | |||
| d46cb56934 | |||
| d43d43d795 | |||
| 00ec265980 | |||
| f839f6ce45 | |||
| 354314dfe0 | |||
| 320e4d7479 | |||
| 17ef5f85de | |||
| 5efbb9a985 | |||
| 397498c120 | |||
| e0026c6da7 | |||
| 627c48c458 | |||
| a08ad09514 | |||
| e7ed858920 | |||
| f749a5f977 | |||
| 547a7b98e5 | |||
| 7ecf6448e3 | |||
| 562a1d1678 | |||
| 82745ef916 | |||
| 0be1feb561 | |||
| 8bb860ad5f | |||
| 22bac058dd | |||
| 34e464edab | |||
| 016f5d48a6 | |||
| 36332e5a94 | |||
| 0fa4ac5525 | |||
| 0f90c0ad9c | |||
| 25beb833fc | |||
| c95758c6ce | |||
| 1629a72093 | |||
| adcab9dcfc | |||
| d54c3da291 | |||
| 1bb0e62bb3 | |||
| 67b86aa683 | |||
| 0904401f1e | |||
| 113f00a6fa | |||
| 17861efa51 | |||
| 558f9ceb39 | |||
| a5653b4296 | |||
| c8b5871782 | |||
| 4b61e29e27 | |||
| 5e80f64cd8 | |||
| 213b9c7c0a | |||
| af22aa7ce1 | |||
| 9e7bc7b541 | |||
| 9b05e48ea6 | |||
| ad9872705d | |||
| afdf581e32 | |||
| 1d495d1a87 | |||
| 2ff62a2ceb | |||
| b88c75c116 | |||
| 3326bddeb0 | |||
| 05614e037a | |||
| 6a77c12735 | |||
| 703cb2d392 | |||
| 517437b0d9 | |||
| 41358c1cee | |||
| 77a05a8960 | |||
| 82e3eb0e93 | |||
| ab3721a2e8 | |||
| c41f43c87f | |||
| 4dc9f9e159 | |||
| 435c853dce | |||
| 04e00d56c6 | |||
| b8dece0e70 | |||
| 8d52890245 | |||
| fb589bf1da | |||
| c547f82957 | |||
| 6056ad58b0 | |||
| 5696a8af9f | |||
| 07cd185368 | |||
| 2c59d59b61 | |||
| 3022aa8379 | |||
| 761595309b | |||
| 87ac9b8a4d | |||
| ed7fddb0b5 | |||
| 397a62677f | |||
| 926ca902bd | |||
| f9b942bb94 | |||
| dff9e0aa76 | |||
| f2513a4ef4 | |||
| c8b8cd9150 | |||
| bb35453d58 | |||
| ba166bf503 | |||
| 7585612347 | |||
| b9e0ef4650 | |||
| aac3ee9bb3 | |||
| 097bb3361e | |||
| ef4614d710 | |||
| bb5519f6f2 | |||
| d7a16084b5 | |||
| f2d710a9dc | |||
| 0dd3be0bd8 | |||
| a59f5ec1ff | |||
| 6ccf3766dc | |||
| 0b56c809e1 | |||
| b83d16364a | |||
| 2a7e7289b3 | |||
| 0f96562bfd | |||
| a4ba2dfe01 | |||
| 0388720390 | |||
| f177b58699 | |||
| bbfa0c515e | |||
| d4e86c1b1d |
@@ -36,7 +36,7 @@ This project contains design documentation for a distributed SCADA system built
|
||||
- Use `git diff` to review changes before committing.
|
||||
- Commit related changes together with a descriptive message summarizing the design decision.
|
||||
|
||||
## Current Component List (20 components)
|
||||
## Current Component List (22 components)
|
||||
|
||||
1. Template Engine — Template modeling, inheritance, composition, validation, flattening, diffs.
|
||||
2. Deployment Manager — Central-side deployment pipeline, system-wide artifact deployment, instance lifecycle.
|
||||
@@ -45,7 +45,7 @@ This project contains design documentation for a distributed SCADA system built
|
||||
5. Central–Site Communication — Akka.NET ClusterClient (command/control) + gRPC server-streaming (real-time data), message patterns, debug streaming.
|
||||
6. Store-and-Forward Engine — Buffering, fixed-interval retry, parking, SQLite persistence, replication.
|
||||
7. External System Gateway — External system definitions, API method invocation, database connections.
|
||||
8. Notification Service — Notification lists, email delivery, store-and-forward integration.
|
||||
8. Notification Service — Central-only notification-list and SMTP definitions, per-type delivery adapters (sites no longer deliver notifications).
|
||||
9. Central UI — Web-based management interface, all workflows.
|
||||
10. Security & Auth — LDAP/AD authentication, role-based authorization, site-scoped permissions.
|
||||
11. Health Monitoring — Site health metrics collection and central reporting.
|
||||
@@ -58,6 +58,8 @@ This project contains design documentation for a distributed SCADA system built
|
||||
18. Management Service — Akka.NET actor providing programmatic access to all admin operations, ClusterClientReceptionist registration.
|
||||
19. CLI — Command-line tool using HTTP Management API, System.CommandLine, JSON/table output.
|
||||
20. Traefik Proxy — Reverse proxy/load balancer fronting central cluster, active node routing via `/health/active`, automatic failover.
|
||||
21. Notification Outbox — Central component ingesting store-and-forwarded notifications, `Notifications` audit table, dispatcher loop, retry/parking, delivery KPIs.
|
||||
22. Site Call Audit — Central component auditing site cached calls (`CachedCall`/`CachedWrite`); `SiteCalls` audit table, telemetry ingest, reconciliation, KPIs, central→site Retry/Discard relay; sites remain the source of truth.
|
||||
|
||||
## Key Design Decisions (for context across sessions)
|
||||
|
||||
@@ -88,6 +90,9 @@ This project contains design documentation for a distributed SCADA system built
|
||||
- Dual call modes: `ExternalSystem.Call()` (synchronous) and `ExternalSystem.CachedCall()` (store-and-forward on transient failure).
|
||||
- Error classification: HTTP 5xx/408/429/connection errors = transient; other 4xx = permanent (returned to script).
|
||||
- Notification Service: SMTP with OAuth2 Client Credentials (Microsoft 365) or Basic Auth. BCC delivery, plain text.
|
||||
- Notification delivery is central-only: sites store-and-forward notifications to the central cluster (target = central, not SMTP); sites never talk to SMTP. Notification lists and SMTP config are no longer deployed to sites; recipient resolution happens at central, at delivery time.
|
||||
- Notification lists carry a `Type` discriminator (`Email` now; `Teams` and others later). `Notify.To("list")` is type-agnostic; delivery is via per-type `INotificationDeliveryAdapter` (success/transient/permanent classification, same pattern as External System Gateway).
|
||||
- `Notify.Send` is async — returns a `NotificationId` (GUID, idempotency key) status handle immediately. `Notify.Status(notificationId)` returns a status record (status, retry count, last error, key timestamps); answered site-locally as `Forwarding` while still in the site S&F buffer, otherwise round-trips to central.
|
||||
- Inbound API: `POST /api/{methodName}`, `X-API-Key` header, flat JSON, extended type system (Object, List).
|
||||
|
||||
### Templates & Deployment
|
||||
@@ -109,6 +114,18 @@ This project contains design documentation for a distributed SCADA system built
|
||||
- Async best-effort replication to standby (no ack wait).
|
||||
- Messages not cleared on instance deletion.
|
||||
- CachedCall idempotency is the caller's responsibility.
|
||||
- Notification Outbox: central `NotificationOutboxActor` singleton on the active central node — the first centrally-hosted store-and-forward component (the S&F Engine remains site-only).
|
||||
- `Notifications` table in central MS SQL is the single source of audit truth (one row per notification); type-agnostic via the `Type` discriminator.
|
||||
- Status lifecycle `Pending → Retrying → Delivered / Parked / Discarded`, plus site-local `Forwarding` (never persisted centrally).
|
||||
- Dispatcher loop polls due rows, resolves the list, delivers via the typed adapter; transient failures retry to `Parked`, permanent failures park immediately.
|
||||
- Site→central handoff is at-least-once: ack-after-persist plus insert-if-not-exists on `NotificationId`.
|
||||
- No Akka replication — MS SQL is the HA store; daily purge of terminal rows after a configurable window (default 365 days).
|
||||
- Notification Outbox retry reuses central SMTP max-retry-count and fixed interval.
|
||||
- Cached calls (`ExternalSystem.CachedCall`, `Database.CachedWrite`) return a `TrackedOperationId` tracking handle, unified with `Notify.Send`'s existing tracking model (`Notify.Status` retained as a thin alias).
|
||||
- A site-local operation tracking table (SQLite, alongside the S&F buffer) is the source of truth for cached-call status; `Tracking.Status(id)` reads it site-locally and authoritatively; terminal rows purged after a configurable window (default 7 days).
|
||||
- Unified tracking status lifecycle `Pending → Retrying → Delivered / Parked / Failed / Discarded`; `Failed` = permanent failure (also returned synchronously to the calling script). No `Forwarding` state for cached calls.
|
||||
- Site Call Audit (#22): central `SiteCallAuditActor` singleton with a `SiteCalls` audit table (central MS SQL) fed by best-effort site telemetry plus periodic reconciliation pulls — an eventually-consistent mirror, NOT a dispatcher; cached-call delivery stays site-local. Ingest is insert-if-not-exists then upsert-on-newer-status.
|
||||
- Central UI Site Calls page + central→site `RetryParkedOperation`/`DiscardParkedOperation` relay for parked cached calls; central never mutates the `SiteCalls` row directly.
|
||||
|
||||
### Security & Auth
|
||||
- Authentication: direct LDAP bind (username/password), no Kerberos/NTLM. LDAPS/StartTLS required.
|
||||
@@ -130,6 +147,10 @@ This project contains design documentation for a distributed SCADA system built
|
||||
- Health reports: 30s interval, 60s offline threshold, monotonic sequence numbers, raw error counts per interval.
|
||||
- Dead letter monitoring as a health metric.
|
||||
- Site Event Logging: 30-day retention, 1GB storage cap, daily purge, paginated queries with keyword search.
|
||||
- Notification Outbox KPIs are central-computed point-in-time from the `Notifications` table (global + per-source-site): queue depth, stuck count, parked count, delivered-last-interval, oldest-pending age.
|
||||
- Stuck = `Pending`/`Retrying` older than a configurable age threshold (default 10 min) — display-only (KPI count + row badge), no escalation/alerting.
|
||||
- Headline KPI tiles surface on the Health dashboard; a new Central UI Notification Outbox page offers a queryable list with Retry/Discard actions on parked notifications.
|
||||
- Site Call Audit KPIs are central-computed point-in-time from the `SiteCalls` table (global + per-site), mirroring the Notification Outbox KPI shape; tiles surface on the Health dashboard alongside a queryable Central UI Site Calls page with Retry/Discard on parked rows.
|
||||
|
||||
### Code Organization
|
||||
- Entity classes are persistence-ignorant POCOs in Commons; EF mappings in Configuration Database.
|
||||
|
||||
@@ -14,7 +14,7 @@ This document serves as the master index for the SCADA system design. The system
|
||||
| Central Database | MS SQL Server, Entity Framework Core |
|
||||
| Site Storage | SQLite (deployed configs, S&F buffer, event logs) |
|
||||
| Authentication | Direct LDAP/AD bind (LDAPS/StartTLS), JWT sessions |
|
||||
| Notifications | SMTP with OAuth2 Client Credentials (Microsoft 365) |
|
||||
| Notifications | Delivered from the central cluster (SMTP, OAuth2/Microsoft 365); store-and-forwarded from sites |
|
||||
| Hosting | Windows Server, Windows Service |
|
||||
| Cluster | Akka.NET Cluster (active/standby, keep-oldest SBR) |
|
||||
| Logging | Serilog (structured) |
|
||||
@@ -38,10 +38,10 @@ This document serves as the master index for the SCADA system design. The system
|
||||
| 2 | Deployment Manager | [docs/requirements/Component-DeploymentManager.md](docs/requirements/Component-DeploymentManager.md) | Central-side deployment pipeline with deployment ID/idempotency, per-instance operation lock, state transition matrix, all-or-nothing site apply, system-wide artifact deployment with per-site status. |
|
||||
| 3 | Site Runtime | [docs/requirements/Component-SiteRuntime.md](docs/requirements/Component-SiteRuntime.md) | Site-side actor hierarchy with explicit supervision strategies, staggered startup, script trust model (constrained APIs), Tell/Ask conventions, concurrency serialization, and site-wide Akka stream with per-subscriber backpressure. |
|
||||
| 4 | Data Connection Layer | [docs/requirements/Component-DataConnectionLayer.md](docs/requirements/Component-DataConnectionLayer.md) | Common data connection interface (OPC UA, custom), Become/Stash connection actor model, auto-reconnect, immediate bad quality on disconnect, transparent re-subscribe, synchronous write failures, tag path resolution retry. |
|
||||
| 5 | Central–Site Communication | [docs/requirements/Component-Communication.md](docs/requirements/Component-Communication.md) | Dual transport: Akka.NET ClusterClient (command/control) + gRPC server-streaming (real-time data). 8 message patterns with per-pattern timeouts, SiteStreamGrpcServer/Client, application-level correlation IDs, transport heartbeat config, gRPC keepalive, message ordering, connection failure behavior. |
|
||||
| 5 | Central–Site Communication | [docs/requirements/Component-Communication.md](docs/requirements/Component-Communication.md) | Dual transport: Akka.NET ClusterClient (command/control) + gRPC server-streaming (real-time data). 9 message patterns with per-pattern timeouts, SiteStreamGrpcServer/Client, application-level correlation IDs, transport heartbeat config, gRPC keepalive, message ordering, connection failure behavior. |
|
||||
| 6 | Store-and-Forward Engine | [docs/requirements/Component-StoreAndForward.md](docs/requirements/Component-StoreAndForward.md) | Buffering (transient failures only), fixed-interval retry, parking, async best-effort replication, SQLite persistence at sites. |
|
||||
| 7 | External System Gateway | [docs/requirements/Component-ExternalSystemGateway.md](docs/requirements/Component-ExternalSystemGateway.md) | HTTP/REST + JSON, API key/Basic Auth, per-system timeout, dual call modes (Call/CachedCall), transient/permanent error classification, dedicated blocking I/O dispatcher, ADO.NET connection pooling. |
|
||||
| 8 | Notification Service | [docs/requirements/Component-NotificationService.md](docs/requirements/Component-NotificationService.md) | SMTP with OAuth2 (M365) or Basic Auth, BCC delivery, plain text, transient/permanent SMTP error classification, store-and-forward integration. |
|
||||
| 8 | Notification Service | [docs/requirements/Component-NotificationService.md](docs/requirements/Component-NotificationService.md) | Central-only — manages typed notification-list and SMTP definitions, supplies per-type delivery adapters (SMTP with OAuth2 (M365) or Basic Auth, BCC, plain text); delivery performed by the Notification Outbox. |
|
||||
| 9 | Central UI | [docs/requirements/Component-CentralUI.md](docs/requirements/Component-CentralUI.md) | Blazor Server with SignalR real-time push, load balancer failover with JWT, all management workflows. |
|
||||
| 10 | Security & Auth | [docs/requirements/Component-Security.md](docs/requirements/Component-Security.md) | Direct LDAP bind (LDAPS/StartTLS), JWT sessions (HMAC-SHA256, 15-min refresh, 30-min idle), role-based authorization, site-scoped permissions. |
|
||||
| 11 | Health Monitoring | [docs/requirements/Component-HealthMonitoring.md](docs/requirements/Component-HealthMonitoring.md) | 30s report interval, 60s offline threshold, monotonic sequence numbers, raw error counts, tag resolution counts, dead letter monitoring. |
|
||||
@@ -54,6 +54,8 @@ This document serves as the master index for the SCADA system design. The system
|
||||
| 18 | Management Service | [docs/requirements/Component-ManagementService.md](docs/requirements/Component-ManagementService.md) | Akka.NET ManagementActor on central, ClusterClientReceptionist registration, programmatic access to all admin operations, CLI interface. |
|
||||
| 19 | CLI | [docs/requirements/Component-CLI.md](docs/requirements/Component-CLI.md) | Standalone command-line tool, System.CommandLine, HTTP transport via Management API, JSON/table output, mirrors all Management Service operations. |
|
||||
| 20 | Traefik Proxy | [docs/requirements/Component-TraefikProxy.md](docs/requirements/Component-TraefikProxy.md) | Reverse proxy/load balancer fronting central cluster, active node routing via `/health/active`, automatic failover. |
|
||||
| 21 | Notification Outbox | [docs/requirements/Component-NotificationOutbox.md](docs/requirements/Component-NotificationOutbox.md) | Central component ingesting store-and-forwarded notifications into the `Notifications` audit table, with `NotificationOutboxActor` singleton dispatcher, per-type delivery adapters, retry/parking, status tracking, daily purge, and delivery KPIs. |
|
||||
| 22 | Site Call Audit | [docs/requirements/Component-SiteCallAudit.md](docs/requirements/Component-SiteCallAudit.md) | Central component auditing site cached calls (`ExternalSystem.CachedCall`/`Database.CachedWrite`) into the `SiteCalls` audit table, with `SiteCallAuditActor` singleton, telemetry ingest, periodic reconciliation, point-in-time KPIs, daily purge, and central→site Retry/Discard relay for parked calls. |
|
||||
|
||||
### Reference Documentation
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
<Project Path="src/ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
|
||||
<Project Path="src/ScadaLink.ExternalSystemGateway/ScadaLink.ExternalSystemGateway.csproj" />
|
||||
<Project Path="src/ScadaLink.NotificationService/ScadaLink.NotificationService.csproj" />
|
||||
<Project Path="src/ScadaLink.NotificationOutbox/ScadaLink.NotificationOutbox.csproj" />
|
||||
<Project Path="src/ScadaLink.CentralUI/ScadaLink.CentralUI.csproj" />
|
||||
<Project Path="src/ScadaLink.Security/ScadaLink.Security.csproj" />
|
||||
<Project Path="src/ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
||||
@@ -31,6 +32,7 @@
|
||||
<Project Path="tests/ScadaLink.StoreAndForward.Tests/ScadaLink.StoreAndForward.Tests.csproj" />
|
||||
<Project Path="tests/ScadaLink.ExternalSystemGateway.Tests/ScadaLink.ExternalSystemGateway.Tests.csproj" />
|
||||
<Project Path="tests/ScadaLink.NotificationService.Tests/ScadaLink.NotificationService.Tests.csproj" />
|
||||
<Project Path="tests/ScadaLink.NotificationOutbox.Tests/ScadaLink.NotificationOutbox.Tests.csproj" />
|
||||
<Project Path="tests/ScadaLink.CentralUI.Tests/ScadaLink.CentralUI.Tests.csproj" />
|
||||
<Project Path="tests/ScadaLink.Security.Tests/ScadaLink.Security.Tests.csproj" />
|
||||
<Project Path="tests/ScadaLink.HealthMonitoring.Tests/ScadaLink.HealthMonitoring.Tests.csproj" />
|
||||
|
||||
@@ -0,0 +1,380 @@
|
||||
# Notification Outbox Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Update the ScadaLink design documentation to introduce a central Notification Outbox — a new component #21 that receives store-and-forwarded notifications from sites, logs them to a type-agnostic `Notifications` table, and delivers them with retry, parking, status handles, and KPIs.
|
||||
|
||||
**Architecture:** This is a documentation-only change. The source of truth for the design is `docs/plans/notif.md` (the approved, refined design). Each task updates one or more existing component documents (or creates the new one) so the documentation set is internally consistent. There is no code and no test suite — verification for each task is a cross-reference consistency check (grep for stale references) plus a read-through against `notif.md`.
|
||||
|
||||
**Tech Stack:** Markdown design documents under `docs/requirements/`, the master `README.md` index, and `CLAUDE.md`. Conventions are defined in `CLAUDE.md` (Document Conventions, Editing Rules).
|
||||
|
||||
**Source of truth:** `docs/plans/notif.md` — every task below implements a slice of it. Read it first.
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Create Component-NotificationOutbox.md
|
||||
|
||||
**Files:**
|
||||
- Create: `docs/requirements/Component-NotificationOutbox.md`
|
||||
- Reference: `docs/plans/notif.md` (full design)
|
||||
- Reference: `docs/requirements/Component-StoreAndForward.md` (structure template — closest analogue)
|
||||
|
||||
**Step 1: Read the source and a template**
|
||||
|
||||
Read `docs/plans/notif.md` in full, and `docs/requirements/Component-StoreAndForward.md` for the standard component-document structure (Purpose, Location, Responsibilities, design sections, Dependencies, Interactions).
|
||||
|
||||
**Step 2: Write the component document**
|
||||
|
||||
Create `Component-NotificationOutbox.md` following the standard structure. Required sections:
|
||||
|
||||
- **Purpose** — central component that receives store-and-forwarded notifications from sites, logs every one to the `Notifications` table (single audit source), and delivers them via per-type adapters with retry, parking, status tracking, and KPIs.
|
||||
- **Location** — Central cluster. `NotificationOutboxActor` is a singleton on the active central node.
|
||||
- **Responsibilities** — owns the durable central queue (`Notifications` table), the dispatcher loop, retry scheduling, parking, per-notification status tracking, KPI computation. Delivery work runs on a dedicated blocking-I/O dispatcher.
|
||||
- **End-to-end flow** — reproduce the flow diagram from `notif.md` (script → site S&F → central ingest → outbox dispatch → adapter).
|
||||
- **The `Notifications` table** — the full field table from `notif.md`, the status lifecycle (`Forwarding` → `Pending` → `Retrying` → `Delivered` / `Parked` / `Discarded`), retry policy (reuses SMTP config max-retry + fixed interval), and retention (daily purge of terminal rows after ~1-year configurable window).
|
||||
- **Ingest & idempotency** — insert-if-not-exists on `NotificationId`; ack-after-persist; at-least-once site→central delivery.
|
||||
- **Dispatcher** — polls due rows, resolves the list, delivers via the matching adapter.
|
||||
- **Delivery adapters** — `INotificationDeliveryAdapter` per `Type` returning `success | transient | permanent`; Email adapter implemented now, Teams/others future.
|
||||
- **Active/standby** — singleton on active central node; state in MS SQL; no Akka replication; resumes from the table on failover.
|
||||
- **Monitoring** — KPIs (queue depth, stuck count, parked count, delivered-last-interval, oldest-pending age); stuck = `Pending`/`Retrying` older than 10 min (configurable), display-only.
|
||||
- **Configuration** — `NotificationOutboxOptions`: dispatch interval, stuck-age threshold, terminal-row retention window.
|
||||
- **Dependencies** — Notification Service (definitions + delivery adapters), Configuration Database (`Notifications` table), Central–Site Communication (notification submission), Health Monitoring (KPIs), Central UI (Outbox page).
|
||||
- **Interactions** — Site S&F Engine (inbound submissions), Notification Service, Central UI, Health Monitoring.
|
||||
|
||||
Match the prose density and heading style of the neighbouring component docs. Edit in place; no backup files (per `CLAUDE.md` Editing Rules).
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n "Notification Outbox\|NotificationOutboxActor\|Notifications table" docs/requirements/Component-NotificationOutbox.md`
|
||||
Expected: the new terms appear; the document reads as a peer of the other component docs.
|
||||
|
||||
Run: `grep -rn "Forwarding" docs/requirements/Component-NotificationOutbox.md`
|
||||
Expected: the `Forwarding` status is documented as site-local, never stored centrally.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-NotificationOutbox.md
|
||||
git commit -m "docs(notification-outbox): add Component-NotificationOutbox design doc"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Revise Component-NotificationService.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-NotificationService.md`
|
||||
- Reference: `docs/plans/notif.md`
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `docs/requirements/Component-NotificationService.md` in full. It currently describes site-side SMTP delivery.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- **Purpose / Location** — the service is now central-only: manages notification-list and SMTP definitions, and provides the delivery adapters. It no longer delivers from sites.
|
||||
- **Definitions** — notification lists gain a **`Type`** field (`Email` now; `Teams`/others later) plus type-specific targets. Definitions and SMTP config are **no longer deployed to sites** (remove the deploy-to-sites / local-SQLite responsibilities).
|
||||
- **Delivery** — replace the site-side delivery section. Delivery is performed centrally by the Notification Outbox calling an `INotificationDeliveryAdapter` per type. The Email adapter is the relocated SMTP composition/send logic. Recipient resolution happens at central at delivery time.
|
||||
- **Script API** — `Notify.To("list").Send(subject, body)` is now **async with a status handle**: it returns a `NotificationId` immediately. Add `Notify.Status(notificationId)` which returns a status record (status, retry count, last error, key timestamps). Note the site-local `Forwarding` status.
|
||||
- **Error classification** — there is no synchronous permanent-failure return to the script anymore; permanent failures result in a `Parked` row. Reframe accordingly.
|
||||
- **Dependencies / Interactions** — depends on Notification Outbox (not the site S&F Engine directly); remove the local-SQLite dependency.
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n "site\|SQLite\|deploy" docs/requirements/Component-NotificationService.md`
|
||||
Expected: no remaining claims that sites send email or that lists/SMTP config deploy to site SQLite.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-NotificationService.md
|
||||
git commit -m "docs(notification-outbox): central-only Notification Service, typed lists, async API"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Revise Component-StoreAndForward.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-StoreAndForward.md`
|
||||
- Reference: `docs/plans/notif.md`
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `docs/requirements/Component-StoreAndForward.md`. It lists three categories including notifications, with SMTP as the notification delivery target.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- The notification category is **retained**, but its delivery *target* changes from SMTP to **the central cluster**. "Delivering" a buffered notification now means handing it to the Communication Layer for central and clearing it on central's ack.
|
||||
- The site→central forward uses a fixed retry interval configured in the host `appsettings.json` (it concerns reaching central, not any notification list — distinct from the per-entity retry settings used by the other two categories).
|
||||
- Update the Message Lifecycle, Retry Policy, Message Format, Dependencies, and Interactions sections so the notification category is described in terms of central delivery, not SMTP.
|
||||
- External-system-call and cached-database-write categories are unchanged.
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n -i "notif" docs/requirements/Component-StoreAndForward.md`
|
||||
Expected: notification references describe central as the target; no SMTP claims remain.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-StoreAndForward.md
|
||||
git commit -m "docs(notification-outbox): retarget S&F notification category to central"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Revise Component-HealthMonitoring.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-HealthMonitoring.md`
|
||||
- Reference: `docs/plans/notif.md` (Monitoring section)
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `docs/requirements/Component-HealthMonitoring.md`. The Monitored Metrics table includes "Store-and-forward buffer depth" with a notification sub-category.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- Add **Notification Outbox KPIs** as **central-computed headline metrics** (queue depth, stuck count, parked count) — distinct from the site-reported metrics, since the outbox is central-side and not part of the site health report.
|
||||
- Clarify that the S&F buffer-depth notification metric now means "notifications awaiting forward to central" (the site→central leg), still site-reported.
|
||||
- Note that outbox KPIs are point-in-time, computed on demand from the `Notifications` table (no time-series store), consistent with the existing "current status only" philosophy.
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n -i "notif\|outbox" docs/requirements/Component-HealthMonitoring.md`
|
||||
Expected: outbox KPIs present as central-computed; S&F notification metric reworded to the site→central leg.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-HealthMonitoring.md
|
||||
git commit -m "docs(notification-outbox): add central-computed outbox KPIs to Health Monitoring"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Revise Component-SiteEventLogging.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-SiteEventLogging.md`
|
||||
- Reference: `docs/plans/notif.md` (Refinement decisions — site-side diagnostics)
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `docs/requirements/Component-SiteEventLogging.md`. The "Events Logged" table has categories for Script, Alarm, Deployment, Connection, S&F, Instance Lifecycle.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- Add a **Notification** category to the Events Logged table. It records **forward failures and long-buffered notifications only** — not routine enqueue/forward success events (central holds the authoritative record; site logging covers the in-transit blind spot).
|
||||
- Add `notification` to the Event Type enumeration in the Event Entry Schema.
|
||||
- Update Dependencies/Interactions to mention the site S&F notification path as a source.
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n -i "notif" docs/requirements/Component-SiteEventLogging.md`
|
||||
Expected: the Notification category appears and is scoped to forward failures / long-buffered only.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-SiteEventLogging.md
|
||||
git commit -m "docs(notification-outbox): add Notification category to Site Event Logging"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 6: Revise Component-Communication.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-Communication.md`
|
||||
- Reference: `docs/plans/notif.md` (end-to-end flow, ingest & idempotency)
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `docs/requirements/Component-Communication.md`. It has a numbered "Communication Patterns" section (1–8, with 6a) and a "Message Timeouts" table.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- Add a new communication pattern — **Notification Submission (Site → Central)** — pattern 9. Pattern: fire-and-forget with acknowledgment (like pattern 5). The site S&F engine sends a `NotificationSubmit` message to central; central acks after persisting the row to the `Notifications` table. The `NotificationId` GUID is the idempotency key.
|
||||
- Update the Purpose sentence's list of what the transport carries to include notification submission.
|
||||
- Add a row to the Message Timeouts table for the new pattern (if it uses ask-with-ack).
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n -i "notification" docs/requirements/Component-Communication.md`
|
||||
Expected: the new pattern and the `NotificationSubmit` message appear.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-Communication.md
|
||||
git commit -m "docs(notification-outbox): add Notification Submission communication pattern"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Revise Component-CentralUI.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-CentralUI.md`
|
||||
- Reference: `docs/plans/notif.md` (Monitoring — Surfacing)
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `docs/requirements/Component-CentralUI.md`. The "Workflows / Pages" section already has a "Notification List Management" page and a "Parked Message Management" page; the Health Monitoring Dashboard is also listed.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- Add a new **Notification Outbox** page section (Deployment Role) under Workflows / Pages, near Health Monitoring Dashboard / Parked Message Management. Contents: KPI tiles (queue depth, stuck, parked, delivered-last-interval, oldest-pending age); a queryable notification list filterable by status, type, source site, list, and time range, with a stuck-only toggle and keyword search on subject; Retry and Discard actions on parked notifications; badged stuck rows.
|
||||
- Note the typed-list change in the Notification List Management page (lists now have a `Type`).
|
||||
- Note the Health Monitoring Dashboard now shows headline outbox KPI tiles.
|
||||
- Follow the established UI conventions (Blazor Server + Bootstrap, custom components, clean corporate design — per `CLAUDE.md`).
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n -i "outbox\|notification" docs/requirements/Component-CentralUI.md`
|
||||
Expected: the Notification Outbox page is present and consistent with the design.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-CentralUI.md
|
||||
git commit -m "docs(notification-outbox): add Notification Outbox page to Central UI"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 8: Revise Component-ConfigurationDatabase.md and Component-Commons.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-ConfigurationDatabase.md`
|
||||
- Modify: `docs/requirements/Component-Commons.md`
|
||||
- Reference: `docs/plans/notif.md` (Notifications table)
|
||||
|
||||
**Step 1: Read the current documents**
|
||||
|
||||
Read both documents. Configuration Database describes EF Core repositories, migrations, audit logging. Commons describes POCO entities, repository interfaces, and message contracts.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
In `Component-ConfigurationDatabase.md`:
|
||||
- Add the **`Notifications` table** to the data access layer — a new EF Core entity, repository, and migration. Note the type-agnostic schema (`Type` discriminator + `TypeData` JSON), the status enum, and the daily purge job for terminal rows after the configurable retention window.
|
||||
|
||||
In `Component-Commons.md`:
|
||||
- Add the **`Notification` entity POCO** (persistence-ignorant, in `Entities/`) and its **repository interface** (in `Interfaces/`).
|
||||
- Add the **`NotificationSubmit`** message contract (and its ack) under `Messages/`, following the additive-only versioning rule.
|
||||
- Add the typed notification-list fields if notification-list entities live in Commons.
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n -i "notification" docs/requirements/Component-ConfigurationDatabase.md docs/requirements/Component-Commons.md`
|
||||
Expected: the `Notifications` table, `Notification` entity, repository interface, and message contracts are documented in their respective layers.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-ConfigurationDatabase.md docs/requirements/Component-Commons.md
|
||||
git commit -m "docs(notification-outbox): add Notifications table, entity, and message contracts"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 9: Update README.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `README.md`
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `README.md`. It has a Technology Stack table, a Component Design Documents table (20 rows), and architecture diagrams.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- Add **row 21 — Notification Outbox** to the Component Design Documents table, linking `docs/requirements/Component-NotificationOutbox.md`, with a one-line description.
|
||||
- Update the Notification Service row (#8) description if it no longer reflects central-only delivery / typed lists.
|
||||
- Update the Technology Stack "Notifications" row to reflect central delivery.
|
||||
- If any architecture diagram shows the notification path, update it; otherwise leave diagrams unchanged.
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n -c "Component-" README.md` and confirm the component table has 21 component rows.
|
||||
Run: `grep -n "Notification Outbox" README.md`
|
||||
Expected: the new component is row 21 and linked correctly.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add README.md
|
||||
git commit -m "docs(notification-outbox): add Notification Outbox to README component index"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 10: Update CLAUDE.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `CLAUDE.md`
|
||||
|
||||
**Step 1: Read the current document**
|
||||
|
||||
Read `CLAUDE.md`. It has "Current Component List (20 components)" and a "Key Design Decisions" section.
|
||||
|
||||
**Step 2: Apply the revisions**
|
||||
|
||||
- Change the heading to "Current Component List (21 components)" and add **21. Notification Outbox** with a one-line description.
|
||||
- Update the Notification Service entry (#8) to reflect central-only definitions + delivery adapters.
|
||||
- Under "Key Design Decisions → External Integrations" (or a suitable subsection), add entries capturing: notifications store-and-forwarded site→central; central `Notifications` table as single audit source; type-agnostic table (email now, Teams later); async `Notify.Send` returning a status handle; central Notification Outbox singleton; site-local `Forwarding` status.
|
||||
|
||||
**Step 3: Verify consistency**
|
||||
|
||||
Run: `grep -n "21\|Notification Outbox" CLAUDE.md`
|
||||
Expected: component count is 21 and the new component + design decisions are listed.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add CLAUDE.md
|
||||
git commit -m "docs(notification-outbox): update component list and design decisions in CLAUDE.md"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 11: Cross-reference consistency sweep
|
||||
|
||||
**Files:**
|
||||
- Verify: all of `docs/requirements/`, `README.md`, `CLAUDE.md`
|
||||
|
||||
**Step 1: Check for stale notification-delivery claims**
|
||||
|
||||
Run: `grep -rn -i "site.*send.*email\|email.*via SMTP\|SMTP.*at sites\|notification list.*deploy" docs/ README.md CLAUDE.md`
|
||||
Expected: no results claiming sites send email directly or that notification lists deploy to sites.
|
||||
|
||||
**Step 2: Check cross-references resolve**
|
||||
|
||||
Run: `grep -rn "Notification Outbox\|NotificationOutbox" docs/requirements/ README.md CLAUDE.md`
|
||||
Expected: every reference points to a real component / document; no dangling references.
|
||||
|
||||
**Step 3: Check component count agreement**
|
||||
|
||||
Confirm `README.md` component table and `CLAUDE.md` component list both say 21 components and list Notification Outbox identically.
|
||||
|
||||
**Step 4: Review the full diff**
|
||||
|
||||
Run: `git diff main --stat` (or review the branch's commits)
|
||||
Read through the changes and confirm the documentation set is internally consistent with `docs/plans/notif.md`.
|
||||
|
||||
**Step 5: Commit any fixes**
|
||||
|
||||
```bash
|
||||
git add -A
|
||||
git commit -m "docs(notification-outbox): fix stale cross-references after outbox addition"
|
||||
```
|
||||
|
||||
(Skip the commit if the sweep found nothing to fix.)
|
||||
|
||||
---
|
||||
|
||||
## Done
|
||||
|
||||
All component documents, the README index, and CLAUDE.md reflect the central Notification Outbox design. The documentation set is internally consistent with `docs/plans/notif.md`.
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-18-notification-outbox.md",
|
||||
"tasks": [
|
||||
{"id": 7, "subject": "Task 1: Create Component-NotificationOutbox.md", "status": "pending"},
|
||||
{"id": 8, "subject": "Task 2: Revise Component-NotificationService.md", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 9, "subject": "Task 3: Revise Component-StoreAndForward.md", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 10, "subject": "Task 4: Revise Component-HealthMonitoring.md", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 11, "subject": "Task 5: Revise Component-SiteEventLogging.md", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 12, "subject": "Task 6: Revise Component-Communication.md", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 13, "subject": "Task 7: Revise Component-CentralUI.md", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 14, "subject": "Task 8: Revise Component-ConfigurationDatabase.md and Component-Commons.md", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 15, "subject": "Task 9: Update README.md", "status": "pending", "blockedBy": [7, 8, 9, 10, 11, 12, 13, 14]},
|
||||
{"id": 16, "subject": "Task 10: Update CLAUDE.md", "status": "pending", "blockedBy": [7, 8, 9, 10, 11, 12, 13, 14]},
|
||||
{"id": 17, "subject": "Task 11: Cross-reference consistency sweep", "status": "pending", "blockedBy": [7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}
|
||||
],
|
||||
"lastUpdated": "2026-05-18"
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
# Cached Call Tracking — Design
|
||||
|
||||
**Date**: 2026-05-19
|
||||
**Status**: Approved
|
||||
**Topic**: Trackable IDs for cached external system calls and cached database writes
|
||||
|
||||
## Problem
|
||||
|
||||
`ExternalSystem.CachedCall()` and `Database.CachedWrite()` are fire-and-forget: a
|
||||
script gets no handle back, cannot confirm delivery, and an operator cannot tie a
|
||||
parked S&F message to a known business operation. `Notify.Send()` already returns a
|
||||
trackable `NotificationId`. The goal is to give cached external/database calls the
|
||||
same first-class traceability, under a tracking model unified across all three
|
||||
store-and-forward producers.
|
||||
|
||||
## Decision
|
||||
|
||||
Add a trackable ID to cached calls via **Approach B — a sibling central component
|
||||
(`Site Call Audit`) plus shared tracking contracts in Commons**. The Notification
|
||||
Outbox is left unchanged; unification lives in shared types and a consistent script
|
||||
API, not in a merged table or component.
|
||||
|
||||
### Why a sibling, not a merged component
|
||||
|
||||
Delivery locality is the decisive constraint:
|
||||
|
||||
- **Notifications** are *central-delivered*: sites store-and-forward them to the
|
||||
central cluster, which delivers via SMTP. The `NotificationOutboxActor` runs a
|
||||
dispatcher loop. Central becomes the source of truth after handoff.
|
||||
- **Cached calls / DB writes** are *site-delivered*: the external system or database
|
||||
often sits on the site's own network and is unreachable from central. The site's
|
||||
S&F Engine must always own delivery, and the **site remains the source of truth**
|
||||
for status. Central audit is an eventually-consistent mirror.
|
||||
|
||||
Merging both into one component (Approach A) would put a dispatcher loop that is live
|
||||
for some rows and dormant for others into a single component, hiding a real
|
||||
architectural difference. Approach B expresses the difference honestly while still
|
||||
giving scripts a unified ID model and `Status()` API.
|
||||
|
||||
## Unified tracking model
|
||||
|
||||
### `TrackedOperationId`
|
||||
|
||||
A GUID, defined in Commons, generated **caller-side at the site at call time**. It is
|
||||
both the tracking handle returned to the script and the idempotency key for telemetry
|
||||
sent to central. `Notify.Send()`'s existing `NotificationId` is the notification-domain
|
||||
name for this same type — no behavior change for notifications.
|
||||
|
||||
### Script API
|
||||
|
||||
| Call | Returns |
|
||||
|---|---|
|
||||
| `ExternalSystem.CachedCall(system, method, params)` | `TrackedOperationId` |
|
||||
| `Database.CachedWrite(name, sql, params)` | `TrackedOperationId` |
|
||||
| `Notify.Send(...)` | `TrackedOperationId` (unchanged) |
|
||||
| `Tracking.Status(id)` | unified status record (status, retry count, last error, key timestamps) |
|
||||
|
||||
`Tracking.Status(id)` is the unified accessor. `Notify.Status(id)` is retained as a
|
||||
thin alias for backward compatibility.
|
||||
|
||||
### Status lifecycle
|
||||
|
||||
`Pending → Retrying → Delivered / Parked / Failed / Discarded`
|
||||
|
||||
- **Delivered** — succeeded. A cached call that succeeds on its first immediate
|
||||
attempt goes straight here and never enters the S&F buffer.
|
||||
- **Parked** — transient retries exhausted; awaiting manual action.
|
||||
- **Failed** — permanent failure (e.g. HTTP 4xx). The error is *also* returned
|
||||
synchronously to the calling script, exactly as today; the record captures it.
|
||||
This is the one state beyond the notification lifecycle.
|
||||
- **Discarded** — operator discarded a parked operation.
|
||||
|
||||
There is no `Forwarding` state for cached calls — that exists only because
|
||||
notifications hand off to central. For cached calls, `Tracking.Status(id)` is always
|
||||
answered site-locally and authoritatively.
|
||||
|
||||
## Site-side architecture
|
||||
|
||||
### Site-local operation tracking table
|
||||
|
||||
A new SQLite table alongside the existing S&F buffer DB. One row per
|
||||
`TrackedOperationId`, created the moment the script issues the cached call,
|
||||
regardless of outcome:
|
||||
|
||||
- Fields: kind, target summary (system+method, or DB name), status, retry count,
|
||||
last error, created/updated/terminal timestamps, source provenance
|
||||
(instance/script).
|
||||
- This table is the **status record**. The S&F buffer remains purely the **retry
|
||||
mechanism**; a buffered message references its `TrackedOperationId`.
|
||||
- Immediate success writes a terminal `Delivered` row directly here, with nothing
|
||||
placed in the S&F buffer.
|
||||
- `Tracking.Status(id)` reads this table — local, authoritative, available even when
|
||||
central is unreachable.
|
||||
- Retention: terminal rows purged after a configurable window (default 7 days; the
|
||||
site holds live operational state, central holds long-term audit).
|
||||
|
||||
### Telemetry to central
|
||||
|
||||
On every lifecycle transition (`Created → Retrying → Delivered/Parked/Failed/
|
||||
Discarded`) the site emits a telemetry event over the existing site→central channel:
|
||||
`TrackedOperationId`, kind, summary, status, retry count, last error, timestamps,
|
||||
source site. Best-effort, at-least-once, idempotent on the ID.
|
||||
|
||||
### Reconciliation
|
||||
|
||||
Because telemetry is best-effort, the central side periodically (and on reconnect)
|
||||
pulls "all tracking rows changed since cursor X" per site. Missed telemetry
|
||||
self-heals. The site never depends on central; central converges to the site.
|
||||
|
||||
### Carried-over rules (unchanged)
|
||||
|
||||
- Tracking rows, like buffered messages, are not cleared on instance deletion.
|
||||
- Cached-call idempotency remains the caller's responsibility — a retry can still
|
||||
double-deliver.
|
||||
|
||||
## Central — Site Call Audit component (new component #22)
|
||||
|
||||
### `SiteCalls` table (central MS SQL)
|
||||
|
||||
Sibling of the `Notifications` table. One row per `TrackedOperationId`: source site,
|
||||
kind, target summary, status, retry count, last error, created/updated/terminal
|
||||
timestamps. Fed only by site telemetry and reconciliation pulls.
|
||||
|
||||
Ingestion is **insert-if-not-exists**, then **upsert-on-newer-status**. The lifecycle
|
||||
is monotonic, so status only advances, never regresses — making at-least-once and
|
||||
out-of-order telemetry harmless. Daily purge of terminal rows after a configurable
|
||||
window (default 365 days, mirroring `Notifications`).
|
||||
|
||||
### `SiteCallAuditActor`
|
||||
|
||||
Singleton on the active central node. Ingests telemetry, runs the periodic
|
||||
reconciliation pulls, computes KPIs, and relays Retry/Discard commands to sites.
|
||||
|
||||
It is **not a dispatcher** — the crucial difference from `NotificationOutboxActor`.
|
||||
Central has no path to a site's external systems or databases; this component is an
|
||||
audit sink, a query surface, and a command relay only.
|
||||
|
||||
### KPIs
|
||||
|
||||
Point-in-time from the `SiteCalls` table, global and per-site, mirroring the
|
||||
Notification Outbox KPI shape: buffered count (`Pending`+`Retrying`), parked count,
|
||||
failed-last-interval, delivered-last-interval, oldest-pending age, and stuck count
|
||||
(`Pending`/`Retrying` older than a configurable threshold, default 10 minutes —
|
||||
display-only, no alerting).
|
||||
|
||||
## Central→site command path (Retry / Discard)
|
||||
|
||||
Parked operations live in the site's S&F buffer, so Retry/Discard from the Central UI
|
||||
must travel down to the owning site:
|
||||
|
||||
- New ClusterClient command/control messages, central→site:
|
||||
`RetryParkedOperation(TrackedOperationId)` and
|
||||
`DiscardParkedOperation(TrackedOperationId)`, riding the existing per-site
|
||||
ClusterClient.
|
||||
- The site applies the command to its S&F buffer / tracking table, then emits normal
|
||||
telemetry reflecting the new state (`Retrying`, or `Discarded`).
|
||||
- Central never directly mutates the `SiteCalls` row. It sends the command and lets
|
||||
the resulting telemetry update the audit row — the site stays the single source of
|
||||
truth.
|
||||
- If the site is offline, the command fails fast and the UI surfaces a
|
||||
"site unreachable" message.
|
||||
|
||||
## Central UI
|
||||
|
||||
New page — **Site Calls** — in the same nav group as the Notification Outbox page:
|
||||
|
||||
- Covers cached calls only: `ExternalCall` + `DatabaseWrite`. Notifications keep their
|
||||
existing dedicated Notification Outbox page.
|
||||
- Queryable list filtered by site, kind, status, and time range. Columns: timestamp,
|
||||
site, kind, target summary, status badge, retry count, last error.
|
||||
- Retry / Discard actions on `Parked` rows, issuing the central→site commands above.
|
||||
- Headline KPI tiles on the Health dashboard alongside the existing Notification
|
||||
Outbox tiles. Stuck rows get a display-only badge — no escalation.
|
||||
- Custom Blazor Server + Bootstrap components, consistent with the rest of the
|
||||
Central UI.
|
||||
|
||||
## Error handling & edge cases
|
||||
|
||||
- **Telemetry loss** — reconciliation pull self-heals; central is explicitly
|
||||
eventually-consistent.
|
||||
- **Out-of-order / duplicate telemetry** — monotonic-status upsert keyed on
|
||||
`TrackedOperationId` makes both harmless.
|
||||
- **Permanent failure on a cached call** — error returned synchronously to the script
|
||||
(unchanged) and recorded as terminal `Failed`.
|
||||
- **Site offline during Retry/Discard** — command fails fast; UI says so; the audit
|
||||
row is unchanged until confirming telemetry arrives.
|
||||
- **Cached-call double-delivery** — still the caller's responsibility; the idempotency
|
||||
note stays in the ESG doc.
|
||||
- **Instance deletion** — tracking rows and buffered messages survive, per the
|
||||
existing S&F rule.
|
||||
|
||||
## Affected documents
|
||||
|
||||
- **New**: `docs/requirements/Component-SiteCallAudit.md`
|
||||
- `Component-ExternalSystemGateway.md` — `CachedCall`/`CachedWrite` return
|
||||
`TrackedOperationId`; `Failed` state; `Tracking.Status`.
|
||||
- `Component-StoreAndForward.md` — site-local tracking table, telemetry emission,
|
||||
reconciliation, `TrackedOperationId` on buffer entries.
|
||||
- `Component-SiteRuntime.md` — Script Runtime API: return types and
|
||||
`Tracking.Status(id)`.
|
||||
- `Component-Communication.md` — telemetry channel and
|
||||
`RetryParkedOperation`/`DiscardParkedOperation` commands.
|
||||
- `Component-Commons.md` — `TrackedOperationId`, unified status enum, telemetry
|
||||
message contracts.
|
||||
- `Component-ConfigurationDatabase.md` — `SiteCalls` table, EF mapping, migration.
|
||||
- `Component-CentralUI.md` — new Site Calls page.
|
||||
- `Component-HealthMonitoring.md` — KPI tiles on the dashboard.
|
||||
- `Component-NotificationService.md` / `Component-NotificationOutbox.md` — note the
|
||||
shared `TrackedOperationId` model and `Notify.Status` alias.
|
||||
- `README.md` — component table updated to 22 components.
|
||||
- `CLAUDE.md` — component list and Key Design Decisions.
|
||||
|
||||
## Out of scope
|
||||
|
||||
- A CLI surface for site-local Retry/Discard (can be added later if needed).
|
||||
- Merging notifications into the Site Calls page or a unified outbox component.
|
||||
- Routing cached-call delivery through central.
|
||||
@@ -0,0 +1,566 @@
|
||||
# Cached Call Tracking Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Give cached external system calls and cached database writes a trackable `TrackedOperationId`, backed by a site-local tracking table and a new central `Site Call Audit` component, under a tracking model unified with `Notify.Send`.
|
||||
|
||||
**Architecture:** Approach B from the design doc — a sibling central component (`Site Call Audit`), not a merged outbox. The site stays the source of truth for cached-call status; central audit is an eventually-consistent mirror fed by best-effort telemetry plus a reconciliation pull. Delivery of cached calls remains site-local.
|
||||
|
||||
**Tech Stack:** This is a design-documentation change. "Implementation" means editing Markdown design documents under `docs/requirements/`, plus `README.md` and `CLAUDE.md`. No source code is touched. The authoritative design is `docs/plans/2026-05-19-cached-call-tracking-design.md` — read it before starting.
|
||||
|
||||
**Working conventions (from `CLAUDE.md`):**
|
||||
- Edit documents in place; no copies or backups.
|
||||
- Component docs follow: Purpose, Location, Responsibilities, design sections, Dependencies, Interactions.
|
||||
- Keep cross-references accurate across all docs.
|
||||
- Use `git diff` to review before committing.
|
||||
|
||||
**Per-task workflow (replaces TDD for this docs project):**
|
||||
1. Read the target file in full first.
|
||||
2. Make the edits described.
|
||||
3. **Verify**: run `git diff <file>` and confirm the change reads correctly and matches the design doc.
|
||||
4. **Cross-reference check**: run the grep given in the task; confirm no stale references.
|
||||
5. **Commit** with the given message.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Create the Site Call Audit component document
|
||||
|
||||
**Files:**
|
||||
- Create: `docs/requirements/Component-SiteCallAudit.md`
|
||||
|
||||
**Step 1: Write the new component doc**
|
||||
|
||||
Create the file following the standard component structure. Content:
|
||||
|
||||
```markdown
|
||||
# Component: Site Call Audit
|
||||
|
||||
## Purpose
|
||||
|
||||
Provides central, queryable audit and operational visibility for cached calls
|
||||
made by site scripts — `ExternalSystem.CachedCall()` and `Database.CachedWrite()`.
|
||||
Each such call carries a `TrackedOperationId`; sites report lifecycle telemetry
|
||||
to this component, which maintains a central audit record, computes KPIs, and
|
||||
relays Retry/Discard actions back to the owning site.
|
||||
|
||||
This is the second centrally-hosted observability component for site
|
||||
store-and-forward activity (the Notification Outbox is the first). Unlike the
|
||||
Notification Outbox, Site Call Audit is **not a dispatcher** — it never delivers
|
||||
anything. Cached calls are delivered by the site's Store-and-Forward Engine
|
||||
against site-local external systems and databases, which central cannot reach.
|
||||
|
||||
## Location
|
||||
|
||||
Central cluster only. A singleton actor (`SiteCallAuditActor`) on the active
|
||||
central node. Registered as component #22 in the Host role configuration.
|
||||
|
||||
## Responsibilities
|
||||
|
||||
- Ingest cached-call lifecycle telemetry from sites into the central `SiteCalls`
|
||||
table.
|
||||
- Run periodic per-site reconciliation pulls so missed telemetry self-heals.
|
||||
- Compute point-in-time KPIs (global and per-site) from the `SiteCalls` table.
|
||||
- Relay operator Retry/Discard actions for parked cached calls to the owning
|
||||
site over the command/control channel.
|
||||
- Purge terminal audit rows after a configurable retention window.
|
||||
|
||||
## The `SiteCalls` Table
|
||||
|
||||
Lives in the central MS SQL configuration database — a sibling of the
|
||||
`Notifications` table. One row per `TrackedOperationId`:
|
||||
|
||||
- **TrackedOperationId** — GUID, primary key. Generated site-side at call time.
|
||||
- **SourceSite** — site that issued the call.
|
||||
- **Kind** — `ExternalCall` or `DatabaseWrite`.
|
||||
- **TargetSummary** — external system + method name, or database connection name.
|
||||
- **Status** — `Pending`, `Retrying`, `Delivered`, `Parked`, `Failed`, `Discarded`.
|
||||
- **RetryCount** — attempts so far.
|
||||
- **LastError** — most recent error detail, if any.
|
||||
- **Provenance** — source instance / script.
|
||||
- **CreatedAtUtc**, **UpdatedAtUtc**, **TerminalAtUtc** — key timestamps.
|
||||
|
||||
## Status Lifecycle
|
||||
|
||||
`Pending → Retrying → Delivered / Parked / Failed / Discarded`
|
||||
|
||||
- **Delivered** — succeeded. A cached call that succeeds on its first immediate
|
||||
attempt is recorded directly as `Delivered`.
|
||||
- **Parked** — transient retries exhausted; awaiting manual action.
|
||||
- **Failed** — permanent failure (e.g. HTTP 4xx). The error was also returned
|
||||
synchronously to the calling script; the record captures it.
|
||||
- **Discarded** — an operator discarded a parked operation.
|
||||
|
||||
The site is the source of truth. The `SiteCalls` row is an eventually-consistent
|
||||
mirror — never queried by scripts (`Tracking.Status()` is answered site-locally).
|
||||
|
||||
## Ingest & Idempotency
|
||||
|
||||
Telemetry ingestion is **insert-if-not-exists** keyed on `TrackedOperationId`,
|
||||
then **upsert-on-newer-status**. The lifecycle is monotonic, so status only
|
||||
advances and never regresses; at-least-once and out-of-order telemetry are
|
||||
therefore harmless.
|
||||
|
||||
## Reconciliation
|
||||
|
||||
Because telemetry is best-effort, `SiteCallAuditActor` periodically — and on site
|
||||
reconnect — pulls "all tracking rows changed since cursor X" from each site.
|
||||
Gaps left by lost telemetry self-heal. Central converges to the site; the site
|
||||
never depends on central.
|
||||
|
||||
## Retry / Discard Relay
|
||||
|
||||
Parked cached calls live in the owning site's S&F buffer. Operator Retry/Discard
|
||||
from the Central UI is relayed to that site as a `RetryParkedOperation` /
|
||||
`DiscardParkedOperation` command over the command/control channel. The site
|
||||
applies the change and emits telemetry reflecting the new state; central never
|
||||
mutates the `SiteCalls` row directly. If the site is offline the command fails
|
||||
fast and the UI surfaces a "site unreachable" message.
|
||||
|
||||
## KPIs
|
||||
|
||||
Point-in-time, computed from the `SiteCalls` table, global and per-source-site,
|
||||
mirroring the Notification Outbox KPI shape:
|
||||
|
||||
- Buffered count (`Pending` + `Retrying`)
|
||||
- Parked count
|
||||
- Failed-last-interval
|
||||
- Delivered-last-interval
|
||||
- Oldest-pending age
|
||||
- Stuck count — `Pending`/`Retrying` older than a configurable threshold
|
||||
(default 10 minutes); display-only, no escalation.
|
||||
|
||||
## Retention
|
||||
|
||||
Daily purge of terminal rows (`Delivered`, `Failed`, `Discarded`) after a
|
||||
configurable window (default 365 days), matching the `Notifications` purge.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **Configuration Database**: hosts the `SiteCalls` table and its repository.
|
||||
- **Central–Site Communication**: receives cached-call telemetry and reconciliation
|
||||
responses; sends Retry/Discard commands.
|
||||
- **Store-and-Forward Engine**: the site-side origin of cached-call telemetry and
|
||||
the executor of relayed Retry/Discard commands.
|
||||
- **Commons**: `TrackedOperationId`, status enum, telemetry message contracts.
|
||||
|
||||
## Interactions
|
||||
|
||||
- **Central UI**: the Site Calls page queries this component and issues
|
||||
Retry/Discard actions.
|
||||
- **Health Monitoring**: surfaces Site Call Audit KPI tiles on the dashboard.
|
||||
- **Cluster Infrastructure**: hosts the `SiteCallAuditActor` singleton with
|
||||
active/standby failover.
|
||||
```
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `git diff --stat` and open the new file.
|
||||
Expected: structure matches other `Component-*.md` files (Purpose → Interactions).
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-SiteCallAudit.md
|
||||
git commit -m "docs(requirements): add Site Call Audit component (#22)"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Add shared tracking contracts to Commons
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-Commons.md` — sections `REQ-COM-1` (data types), `REQ-COM-5` (message contracts)
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
In `### REQ-COM-1: Shared Data Type System`, add `TrackedOperationId` as a shared
|
||||
type: a GUID identifying any tracked store-and-forward operation
|
||||
(`CachedCall`, `CachedWrite`, `Notify.Send`), generated caller-side at the site
|
||||
at call time, doubling as the telemetry idempotency key. Note that the existing
|
||||
`NotificationId` is the notification-domain name for this same concept.
|
||||
|
||||
Add a shared `TrackedOperationStatus` enum:
|
||||
`Pending`, `Retrying`, `Delivered`, `Parked`, `Failed`, `Discarded`.
|
||||
|
||||
In `### REQ-COM-5: Cross-Component Message Contracts`, add the cached-call
|
||||
telemetry and command contracts (additive-only, per REQ-COM-5a):
|
||||
- `CachedCallTelemetry` — `TrackedOperationId`, source site, `Kind`,
|
||||
target summary, status, retry count, last error, timestamps, provenance.
|
||||
- `CachedCallReconcileRequest` / `CachedCallReconcileResponse` — cursor-based
|
||||
per-site pull of changed tracking rows.
|
||||
- `RetryParkedOperation` / `DiscardParkedOperation` — central→site commands
|
||||
keyed by `TrackedOperationId` (generalize naming so they cover cached calls,
|
||||
not only legacy "parked message" wording).
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `git diff docs/requirements/Component-Commons.md`
|
||||
Expected: additive only; no existing type or contract removed/renamed.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-Commons.md
|
||||
git commit -m "docs(requirements): add TrackedOperationId and cached-call contracts to Commons"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Update the Store-and-Forward Engine doc
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-StoreAndForward.md` — `Responsibilities`,
|
||||
`Message Lifecycle`, `Persistence`, `Parked Message Management`, `Message Format`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
- **Responsibilities / Persistence**: introduce the **site-local operation
|
||||
tracking table** — a SQLite table alongside the S&F buffer DB, holding one row
|
||||
per `TrackedOperationId` for cached calls regardless of outcome. It is the
|
||||
status record; the S&F buffer remains only the retry mechanism. State that
|
||||
`Tracking.Status(id)` reads this table, that it is the source of truth, and
|
||||
that terminal rows are purged after a configurable window (default 7 days).
|
||||
- **Message Lifecycle**: a cached call that succeeds on its first immediate
|
||||
attempt is written directly as a terminal `Delivered` tracking row and never
|
||||
enters the S&F buffer. A buffered cached-call message references its
|
||||
`TrackedOperationId`.
|
||||
- Add a **telemetry emission** note: on every lifecycle transition the site emits
|
||||
`CachedCallTelemetry` to central (best-effort, at-least-once, idempotent on the
|
||||
ID) and responds to `CachedCallReconcileRequest` pulls.
|
||||
- **Parked Message Management**: note that Retry/Discard of parked cached calls
|
||||
can be driven by central via `RetryParkedOperation`/`DiscardParkedOperation`,
|
||||
after which the site emits telemetry reflecting the new state.
|
||||
- **Message Format**: add `TrackedOperationId` to the listed per-message fields.
|
||||
|
||||
Leave the notification category behavior unchanged.
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `git diff docs/requirements/Component-StoreAndForward.md`
|
||||
Expected: cached-call and DB-write categories gain tracking; notification flow untouched.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-StoreAndForward.md
|
||||
git commit -m "docs(requirements): add site-local tracking table and telemetry to Store-and-Forward"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Update the External System Gateway doc
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-ExternalSystemGateway.md` — `Cached Write`,
|
||||
`External System Call Modes`, `Call Timeout & Error Handling`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
- `### Cached (Store-and-Forward)` and `### Cached Write (Store-and-Forward)`:
|
||||
state that `CachedCall`/`CachedWrite` now return a `TrackedOperationId`. They
|
||||
are no longer "fire-and-forget" with no handle — replace that wording with
|
||||
"deferred-delivery, returns a tracking handle". Immediate success → terminal
|
||||
`Delivered` record; transient failure → buffered, `Pending`/`Retrying`.
|
||||
- Permanent failure: the error is still returned synchronously to the script
|
||||
(unchanged) **and** recorded as a terminal `Failed` tracking record.
|
||||
- Keep the idempotency note — duplicate delivery on retry is still the caller's
|
||||
responsibility.
|
||||
- Add a one-line pointer that status is observable via `Tracking.Status(id)` and
|
||||
centrally via the Site Call Audit component.
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `grep -n "fire-and-forget\|TrackedOperationId" docs/requirements/Component-ExternalSystemGateway.md`
|
||||
Expected: "fire-and-forget" no longer describes cached calls; `TrackedOperationId` present.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-ExternalSystemGateway.md
|
||||
git commit -m "docs(requirements): cached calls return TrackedOperationId in ESG"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Update the Site Runtime Script Runtime API
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-SiteRuntime.md` — `### External Systems`,
|
||||
`### Notifications`, `### Database Access` under `## Script Runtime API`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
- `### External Systems`: `ExternalSystem.CachedCall(...)` now returns a
|
||||
`TrackedOperationId`; drop "fire-and-forget", say it returns a tracking handle.
|
||||
- `### Database Access`: `Database.CachedWrite(...)` now returns a
|
||||
`TrackedOperationId`.
|
||||
- Add the unified accessor `Tracking.Status("trackedOperationId")` — returns a
|
||||
status record (status, retry count, last error, key timestamps) for any tracked
|
||||
operation, answered site-locally and authoritatively for cached calls.
|
||||
- `### Notifications`: note that `Notify.Status(...)` is retained as a thin alias
|
||||
of `Tracking.Status(...)`; `Notify.Send` returns a `TrackedOperationId`
|
||||
(the value historically called `NotificationId`).
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `git diff docs/requirements/Component-SiteRuntime.md`
|
||||
Expected: all three cached/async producers return `TrackedOperationId`; `Tracking.Status` documented.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-SiteRuntime.md
|
||||
git commit -m "docs(requirements): add Tracking.Status and cached-call handles to Script Runtime API"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 6: Update the Central–Site Communication doc
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-Communication.md` — `### 8. Remote Queries`,
|
||||
and add a new pattern for cached-call telemetry
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
- Add a new communication pattern (e.g. `### 10. Cached Call Telemetry (Site → Central)`):
|
||||
the site S&F Engine pushes `CachedCallTelemetry` on every lifecycle transition;
|
||||
best-effort, at-least-once, idempotent on `TrackedOperationId`; transport is
|
||||
ClusterClient command/control. Also describe the reconciliation pull
|
||||
(`CachedCallReconcileRequest`/`Response`) initiated by `SiteCallAuditActor`.
|
||||
- `### 8. Remote Queries (Central → Site)`: generalize the "Retry or discard
|
||||
parked messages" command line to also cover cached calls keyed by
|
||||
`TrackedOperationId` (`RetryParkedOperation` / `DiscardParkedOperation`).
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `grep -n "Telemetry\|RetryParkedOperation" docs/requirements/Component-Communication.md`
|
||||
Expected: new telemetry pattern and generalized command present.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-Communication.md
|
||||
git commit -m "docs(requirements): add cached-call telemetry pattern to Communication"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 7: Update the Configuration Database doc
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-ConfigurationDatabase.md` — `## Database Schema`
|
||||
(add a `### Site Calls` subsection), `## Scheduled Maintenance`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
- Under `## Database Schema`, add a `### Site Calls` subsection describing the
|
||||
`SiteCalls` table (columns per Task 1's "The `SiteCalls` Table" list), noting
|
||||
it is populated only by Site Call Audit telemetry/reconciliation, and that
|
||||
ingestion is insert-if-not-exists + upsert-on-newer-status.
|
||||
- Under `## Scheduled Maintenance`, add a `### SiteCalls Table Purge` subsection
|
||||
mirroring the `### Notifications Table Purge` wording: daily purge of terminal
|
||||
rows after a configurable window (default 365 days).
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `grep -n "SiteCalls" docs/requirements/Component-ConfigurationDatabase.md`
|
||||
Expected: schema subsection and purge subsection both present.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-ConfigurationDatabase.md
|
||||
git commit -m "docs(requirements): add SiteCalls table and purge to Configuration Database"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 8: Update the Central UI doc
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-CentralUI.md` — `## Workflows / Pages`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
Add a `### Site Calls (Deployment Role)` page after the
|
||||
`### Notification Outbox (Deployment Role)` section:
|
||||
- Queryable list of cached calls (`ExternalCall` + `DatabaseWrite` only —
|
||||
notifications keep their own Notification Outbox page).
|
||||
- Filters: site, kind, status, time range.
|
||||
- Columns: timestamp, site, kind, target summary, status badge, retry count,
|
||||
last error.
|
||||
- Retry / Discard actions on `Parked` rows; "site unreachable" handling when the
|
||||
owning site is offline.
|
||||
- Custom Blazor Server + Bootstrap components, no third-party frameworks.
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `grep -n "Site Calls" docs/requirements/Component-CentralUI.md`
|
||||
Expected: new page section present, scoped to cached calls.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-CentralUI.md
|
||||
git commit -m "docs(requirements): add Site Calls page to Central UI"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 9: Update the Health Monitoring doc
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-HealthMonitoring.md` — add a
|
||||
`## Site Call Audit KPIs` section after `## Notification Outbox KPIs`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
Add a `## Site Call Audit KPIs` section mirroring `## Notification Outbox KPIs`:
|
||||
the dashboard surfaces Site Call Audit headline KPI tiles (buffered, parked,
|
||||
failed-last-interval, delivered-last-interval, oldest-pending age, stuck count),
|
||||
computed point-in-time by the Site Call Audit component, global and per-site.
|
||||
Stuck is display-only.
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `grep -n "Site Call Audit KPIs" docs/requirements/Component-HealthMonitoring.md`
|
||||
Expected: section present.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-HealthMonitoring.md
|
||||
git commit -m "docs(requirements): add Site Call Audit KPIs to Health Monitoring"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 10: Note the shared model in Notification docs
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/requirements/Component-NotificationService.md` — `## Script API`
|
||||
- Modify: `docs/requirements/Component-NotificationOutbox.md` — `## Purpose` or
|
||||
`### Status Lifecycle`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
- `Component-NotificationService.md` `## Script API`: note that `Notify.Send`'s
|
||||
`NotificationId` is a `TrackedOperationId` (shared Commons type) and
|
||||
`Notify.Status` is an alias of the unified `Tracking.Status`.
|
||||
- `Component-NotificationOutbox.md`: add a sentence that the Notification Outbox
|
||||
and the Site Call Audit component share the `TrackedOperationId` tracking
|
||||
model and status lifecycle, but differ in delivery locality — the Notification
|
||||
Outbox delivers; Site Call Audit only audits.
|
||||
|
||||
Do not change any notification behavior.
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `git diff docs/requirements/Component-NotificationService.md docs/requirements/Component-NotificationOutbox.md`
|
||||
Expected: additive notes only, no behavior change.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/requirements/Component-NotificationService.md docs/requirements/Component-NotificationOutbox.md
|
||||
git commit -m "docs(requirements): note shared TrackedOperationId model in notification docs"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 11: Update the README component table
|
||||
|
||||
**Files:**
|
||||
- Modify: `README.md` — component table and any architecture diagram component count
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
Add row 22 — **Site Call Audit** — to the component table:
|
||||
"Central component auditing site cached calls (`CachedCall`/`CachedWrite`);
|
||||
`SiteCalls` table, telemetry ingest, reconciliation, KPIs, central→site
|
||||
Retry/Discard relay." Update any "21 components" count to 22.
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `grep -rn "21 component\|22 component" README.md`
|
||||
Expected: count reads 22; no stale "21".
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add README.md
|
||||
git commit -m "docs: add Site Call Audit to README component table"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 12: Update CLAUDE.md
|
||||
|
||||
**Files:**
|
||||
- Modify: `CLAUDE.md` — `## Current Component List`, `## Key Design Decisions`
|
||||
|
||||
**Step 1: Edit the doc**
|
||||
|
||||
- Change the heading `## Current Component List (21 components)` to `(22 components)`
|
||||
and add item 22 — **Site Call Audit** — with a one-line description.
|
||||
- Under `## Key Design Decisions`, in `### Store-and-Forward` (or `### UI & Monitoring`),
|
||||
add bullets summarizing: cached calls return a `TrackedOperationId`; site-local
|
||||
tracking table is the status source of truth; new central Site Call Audit
|
||||
component mirrors status via best-effort telemetry + reconciliation; cached-call
|
||||
delivery stays site-local; unified `Tracking.Status` accessor; `Failed` terminal
|
||||
state for permanent failures.
|
||||
|
||||
**Step 2: Verify**
|
||||
|
||||
Run: `grep -n "22 components\|Site Call Audit" CLAUDE.md`
|
||||
Expected: count is 22; component listed; design decisions present.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add CLAUDE.md
|
||||
git commit -m "docs: record cached-call tracking in CLAUDE.md"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 13: Final cross-reference consistency pass
|
||||
|
||||
**Files:**
|
||||
- Potentially any `docs/requirements/Component-*.md`, `README.md`, `CLAUDE.md`
|
||||
|
||||
**Step 1: Sweep for stale or missing references**
|
||||
|
||||
Run each and review:
|
||||
```bash
|
||||
grep -rn "fire-and-forget" docs/requirements/
|
||||
grep -rn "21 component" README.md CLAUDE.md
|
||||
grep -rln "Site Call Audit" docs/requirements/ README.md CLAUDE.md
|
||||
grep -rn "TrackedOperationId" docs/requirements/
|
||||
```
|
||||
Expected: no "fire-and-forget" describing cached calls; no "21 component" left;
|
||||
Site Call Audit referenced by its dependents (Communication, Configuration
|
||||
Database, Central UI, Health Monitoring, Commons); `TrackedOperationId` used
|
||||
consistently.
|
||||
|
||||
**Step 2: Confirm new component's Dependencies/Interactions are reciprocated**
|
||||
|
||||
Verify each component named in `Component-SiteCallAudit.md` Dependencies/Interactions
|
||||
also references Site Call Audit where appropriate.
|
||||
|
||||
**Step 3: Fix any gaps found, then commit**
|
||||
|
||||
```bash
|
||||
git add -A
|
||||
git commit -m "docs(requirements): reconcile cross-references for Site Call Audit"
|
||||
```
|
||||
|
||||
If no gaps are found, skip the commit and note the plan is complete.
|
||||
|
||||
---
|
||||
|
||||
## Done
|
||||
|
||||
All cached-call tracking design changes are recorded. The design rationale lives
|
||||
in `docs/plans/2026-05-19-cached-call-tracking-design.md`.
|
||||
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-19-cached-call-tracking.md",
|
||||
"tasks": [
|
||||
{"id": 6, "subject": "Task 1: Create Site Call Audit component doc", "status": "pending"},
|
||||
{"id": 7, "subject": "Task 2: Add tracking contracts to Commons", "status": "pending", "blockedBy": [6]},
|
||||
{"id": 8, "subject": "Task 3: Update Store-and-Forward doc", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 9, "subject": "Task 4: Update External System Gateway doc", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 10, "subject": "Task 5: Update Site Runtime Script Runtime API", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 11, "subject": "Task 6: Update Communication doc", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 12, "subject": "Task 7: Update Configuration Database doc", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 13, "subject": "Task 8: Update Central UI doc", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 14, "subject": "Task 9: Update Health Monitoring doc", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 15, "subject": "Task 10: Note shared model in notification docs", "status": "pending", "blockedBy": [6, 7]},
|
||||
{"id": 16, "subject": "Task 11: Update README component table", "status": "pending", "blockedBy": [6]},
|
||||
{"id": 17, "subject": "Task 12: Update CLAUDE.md", "status": "pending", "blockedBy": [6]},
|
||||
{"id": 18, "subject": "Task 13: Final cross-reference consistency pass", "status": "pending", "blockedBy": [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]}
|
||||
],
|
||||
"lastUpdated": "2026-05-19"
|
||||
}
|
||||
@@ -0,0 +1,717 @@
|
||||
# Notification Outbox — Code Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Build the central Notification Outbox feature in the ScadaLink `src/` codebase — sites store-and-forward notifications to the central cluster, which logs each to a `Notifications` table and delivers it via per-type adapters with retry, parking, status handles, and KPIs.
|
||||
|
||||
**Architecture:** A new `ScadaLink.NotificationOutbox` project hosts a `NotificationOutboxActor` cluster singleton on the active central node. Sites enqueue notifications into the existing site Store-and-Forward Engine (notification category, retargeted from SMTP to "central"); the S&F engine forwards them to central via `ClusterClient`; the `CentralCommunicationActor` routes each `NotificationSubmit` to the outbox singleton, which inserts a row into the central MS SQL `Notifications` table (insert-if-not-exists on a site-generated `NotificationId` GUID) and acks. A timer-driven dispatcher polls due rows and delivers them through an `INotificationDeliveryAdapter` (Email adapter now; Teams later). A Blazor page surfaces KPIs and a queryable list with Retry/Discard.
|
||||
|
||||
**Tech Stack:** .NET 10, Akka.NET (cluster singletons, ClusterClient, TestKit), EF Core (MS SQL; SQLite in-memory for tests), Blazor Server + Bootstrap, xUnit + NSubstitute + bUnit. Solution: `ScadaLink.slnx`.
|
||||
|
||||
**Authoritative design:** `docs/plans/notif.md` and `docs/requirements/Component-NotificationOutbox.md`. Read both before starting.
|
||||
|
||||
---
|
||||
|
||||
## Conventions (read once, applies to every task)
|
||||
|
||||
These were confirmed by exploring the existing codebase. Follow them in every task.
|
||||
|
||||
- **Entities (Commons):** POCOs in `src/ScadaLink.Commons/Entities/<Area>/`. Auto-properties, parameterized constructor with null checks, navigation collections initialised to `new List<T>()`. No data annotations.
|
||||
- **EF mapping (ConfigurationDatabase):** Fluent `IEntityTypeConfiguration<T>` classes in `src/ScadaLink.ConfigurationDatabase/Configurations/`, auto-applied by `ApplyConfigurationsFromAssembly`. Enums stored as strings via `.HasConversion<string>()`. Add a `DbSet<T>` to `ScadaLinkDbContext`.
|
||||
- **Repositories:** Interface in `src/ScadaLink.Commons/Interfaces/Repositories/`, implementation in `src/ScadaLink.ConfigurationDatabase/Repositories/`. Inject `ScadaLinkDbContext`, use `_context.Set<T>()`, expose explicit `SaveChangesAsync`. Register in `ConfigurationDatabase/ServiceCollectionExtensions.cs` with `AddScoped`.
|
||||
- **Migrations:** `dotnet ef migrations add <Name> --project src/ScadaLink.ConfigurationDatabase` — timestamp-named. Applied via `MigrationHelper.ApplyOrValidateMigrationsAsync` (auto in dev).
|
||||
- **Message contracts (Commons):** `record` types in `src/ScadaLink.Commons/Messages/<Area>/`, named positional params, additive-only evolution.
|
||||
- **Options pattern:** `<Component>Options` class owned by the component project; component's `ServiceCollectionExtensions.Add<Component>()` calls `services.AddOptions<T>().BindConfiguration("ScadaLink:<Section>")`; Host also `services.Configure<T>(...)`. Config lives in `appsettings.Central.json` / `appsettings.Site.json`.
|
||||
- **Actors:** No Akka.DI framework. Dependencies passed via `Props.Create(() => new XActor(...))`. Actors that need scoped services take `IServiceProvider` and call `CreateScope()`. Cluster singletons use `ClusterSingletonManager.Props` + `ClusterSingletonProxy.Props`, created in `src/ScadaLink.Host/Actors/AkkaHostedService.cs`.
|
||||
- **Tests:** xUnit, NSubstitute, built-in `Assert`. One `tests/ScadaLink.<Component>.Tests/` project per `src/` project. Actor tests inherit `Akka.TestKit.Xunit2.TestKit`. Repository tests use SQLite in-memory (`DataSource=:memory:`, `OpenConnection()` + `EnsureCreated()`, `IDisposable`). Blazor tests inherit bUnit `BunitContext`. Test naming: `Method_Scenario_Result`.
|
||||
- **Run tests:** whole suite `dotnet test ScadaLink.slnx`; single project `dotnet test tests/ScadaLink.<X>.Tests/ScadaLink.<X>.Tests.csproj`; single test `--filter "FullyQualifiedName~<Class>.<Method>"`.
|
||||
- **Build:** `dotnet build ScadaLink.slnx`.
|
||||
- **TDD:** every task writes the failing test first, runs it red, implements, runs it green, commits. Use the superpowers-extended-cc:test-driven-development discipline.
|
||||
- **Commits:** one per task, message `feat(notification-outbox): <task summary>`.
|
||||
|
||||
**Status lifecycle** (central `Notifications` table — `Forwarding` is site-local, never stored centrally):
|
||||
`Pending → Retrying → Delivered | Parked`, plus `Discarded` (operator action only).
|
||||
|
||||
---
|
||||
|
||||
## Phase A — Data layer (Commons + ConfigurationDatabase)
|
||||
|
||||
### Task 1: Notification enums
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.Commons/Types/Enums/NotificationType.cs`
|
||||
- Create: `src/ScadaLink.Commons/Types/Enums/NotificationStatus.cs`
|
||||
- Test: `tests/ScadaLink.Commons.Tests/Types/NotificationEnumTests.cs` (create if the test project lacks a `Types/` folder)
|
||||
|
||||
**Step 1 — failing test.** Assert the enums expose exactly the expected members:
|
||||
```csharp
|
||||
[Fact]
|
||||
public void NotificationStatus_HasExactlyTheCentralStates()
|
||||
{
|
||||
var names = Enum.GetNames<NotificationStatus>();
|
||||
Assert.Equal(
|
||||
new[] { "Pending", "Retrying", "Delivered", "Parked", "Discarded" },
|
||||
names);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationType_HasEmail()
|
||||
{
|
||||
Assert.True(Enum.IsDefined(NotificationType.Email));
|
||||
}
|
||||
```
|
||||
Note: `Forwarding` is intentionally NOT a `NotificationStatus` member — it is a site-local concept (Task 19), never persisted centrally.
|
||||
|
||||
**Step 2 — run red:** `dotnet test tests/ScadaLink.Commons.Tests/ScadaLink.Commons.Tests.csproj --filter "FullyQualifiedName~NotificationEnumTests"` → FAIL (types don't exist).
|
||||
|
||||
**Step 3 — implement.**
|
||||
```csharp
|
||||
// NotificationType.cs — namespace ScadaLink.Commons.Types.Enums
|
||||
public enum NotificationType { Email } // Teams and others added later
|
||||
|
||||
// NotificationStatus.cs — namespace ScadaLink.Commons.Types.Enums
|
||||
public enum NotificationStatus { Pending, Retrying, Delivered, Parked, Discarded }
|
||||
```
|
||||
|
||||
**Step 4 — run green.** Same filter → PASS.
|
||||
|
||||
**Step 5 — commit:**
|
||||
```bash
|
||||
git add src/ScadaLink.Commons/Types/Enums/NotificationType.cs src/ScadaLink.Commons/Types/Enums/NotificationStatus.cs tests/ScadaLink.Commons.Tests/Types/NotificationEnumTests.cs
|
||||
git commit -m "feat(notification-outbox): add NotificationType and NotificationStatus enums"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: `Notification` entity POCO
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.Commons/Entities/Notifications/Notification.cs`
|
||||
- Test: `tests/ScadaLink.Commons.Tests/Entities/NotificationEntityTests.cs`
|
||||
|
||||
**Step 1 — failing test.** Verify the constructor sets required fields, defaults `Status` to `Pending` and `RetryCount` to 0, and rejects nulls:
|
||||
```csharp
|
||||
[Fact]
|
||||
public void Constructor_SetsDefaults()
|
||||
{
|
||||
var n = new Notification("id-1", NotificationType.Email, "ops-team", "subj", "body", "SiteA");
|
||||
Assert.Equal(NotificationStatus.Pending, n.Status);
|
||||
Assert.Equal(0, n.RetryCount);
|
||||
Assert.Equal("id-1", n.NotificationId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullListName_Throws()
|
||||
=> Assert.Throws<ArgumentNullException>(
|
||||
() => new Notification("id", NotificationType.Email, null!, "s", "b", "SiteA"));
|
||||
```
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Match the `Notifications` table schema in `notif.md`:
|
||||
```csharp
|
||||
namespace ScadaLink.Commons.Entities.Notifications;
|
||||
|
||||
public class Notification
|
||||
{
|
||||
public string NotificationId { get; set; } // GUID PK, generated at site
|
||||
public NotificationType Type { get; set; }
|
||||
public string ListName { get; set; }
|
||||
public string Subject { get; set; }
|
||||
public string Body { get; set; }
|
||||
public string? TypeData { get; set; } // JSON extensibility hook
|
||||
public NotificationStatus Status { get; set; } = NotificationStatus.Pending;
|
||||
public int RetryCount { get; set; }
|
||||
public string? LastError { get; set; }
|
||||
public string? ResolvedTargets { get; set; } // snapshotted at delivery, for audit
|
||||
public string SourceSiteId { get; set; }
|
||||
public string? SourceInstanceId { get; set; }
|
||||
public string? SourceScript { get; set; }
|
||||
public DateTimeOffset SiteEnqueuedAt { get; set; }
|
||||
public DateTimeOffset CreatedAt { get; set; } // central ingest time
|
||||
public DateTimeOffset? LastAttemptAt { get; set; }
|
||||
public DateTimeOffset? NextAttemptAt { get; set; }
|
||||
public DateTimeOffset? DeliveredAt { get; set; }
|
||||
|
||||
public Notification(string notificationId, NotificationType type, string listName,
|
||||
string subject, string body, string sourceSiteId)
|
||||
{
|
||||
NotificationId = notificationId ?? throw new ArgumentNullException(nameof(notificationId));
|
||||
Type = type;
|
||||
ListName = listName ?? throw new ArgumentNullException(nameof(listName));
|
||||
Subject = subject ?? throw new ArgumentNullException(nameof(subject));
|
||||
Body = body ?? throw new ArgumentNullException(nameof(body));
|
||||
SourceSiteId = sourceSiteId ?? throw new ArgumentNullException(nameof(sourceSiteId));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add Notification entity`).
|
||||
|
||||
---
|
||||
|
||||
### Task 3: `Type` field on `NotificationList`
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.Commons/Entities/Notifications/NotificationList.cs`
|
||||
- Modify: `src/ScadaLink.ConfigurationDatabase/Configurations/NotificationConfiguration.cs` (`NotificationListConfiguration`)
|
||||
- Test: `tests/ScadaLink.ConfigurationDatabase.Tests/RepositoryTests.cs` (add a test to the notification repository tests)
|
||||
|
||||
**Step 1 — failing test.** A `NotificationList` round-trips its `Type` through the repository:
|
||||
```csharp
|
||||
[Fact]
|
||||
public async Task NotificationList_PersistsType()
|
||||
{
|
||||
var list = new NotificationList("ops") { Type = NotificationType.Email };
|
||||
await _notificationRepo.AddNotificationListAsync(list);
|
||||
await _notificationRepo.SaveChangesAsync();
|
||||
_context.ChangeTracker.Clear();
|
||||
var loaded = await _notificationRepo.GetListByNameAsync("ops");
|
||||
Assert.Equal(NotificationType.Email, loaded!.Type);
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Add to `NotificationList`: `public NotificationType Type { get; set; } = NotificationType.Email;`. In `NotificationListConfiguration.Configure`, add `builder.Property(n => n.Type).HasConversion<string>().HasMaxLength(32).IsRequired();`.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add Type field to NotificationList`).
|
||||
|
||||
---
|
||||
|
||||
### Task 4: `Notification` EF configuration + DbSet
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.ConfigurationDatabase/Configurations/NotificationOutboxConfiguration.cs`
|
||||
- Modify: `src/ScadaLink.ConfigurationDatabase/ScadaLinkDbContext.cs` (add `DbSet<Notification>`)
|
||||
- Test: `tests/ScadaLink.ConfigurationDatabase.Tests/RepositoryTests.cs`
|
||||
|
||||
**Step 1 — failing test.** A `Notification` round-trips all fields through the `DbContext` (use the SQLite in-memory fixture pattern). Assert the `Status`/`Type` enums persist as strings and the row is found by `NotificationId`.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Configuration:
|
||||
```csharp
|
||||
public class NotificationOutboxConfiguration : IEntityTypeConfiguration<Notification>
|
||||
{
|
||||
public void Configure(EntityTypeBuilder<Notification> builder)
|
||||
{
|
||||
builder.ToTable("Notifications");
|
||||
builder.HasKey(n => n.NotificationId);
|
||||
builder.Property(n => n.NotificationId).HasMaxLength(64);
|
||||
builder.Property(n => n.Type).HasConversion<string>().HasMaxLength(32).IsRequired();
|
||||
builder.Property(n => n.Status).HasConversion<string>().HasMaxLength(32).IsRequired();
|
||||
builder.Property(n => n.ListName).HasMaxLength(200).IsRequired();
|
||||
builder.Property(n => n.Subject).HasMaxLength(1000).IsRequired();
|
||||
builder.Property(n => n.Body).IsRequired(); // nvarchar(max)
|
||||
builder.Property(n => n.TypeData); // nvarchar(max), nullable
|
||||
builder.Property(n => n.ResolvedTargets); // nvarchar(max), nullable
|
||||
builder.Property(n => n.LastError).HasMaxLength(4000);
|
||||
builder.Property(n => n.SourceSiteId).HasMaxLength(100).IsRequired();
|
||||
builder.Property(n => n.SourceInstanceId).HasMaxLength(200);
|
||||
builder.Property(n => n.SourceScript).HasMaxLength(200);
|
||||
builder.HasIndex(n => new { n.Status, n.NextAttemptAt }); // dispatcher polling
|
||||
builder.HasIndex(n => new { n.SourceSiteId, n.CreatedAt }); // KPIs / UI query
|
||||
}
|
||||
}
|
||||
```
|
||||
Add `public DbSet<Notification> Notifications => Set<Notification>();` to `ScadaLinkDbContext`.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add Notification EF configuration and DbSet`).
|
||||
|
||||
---
|
||||
|
||||
### Task 5: `INotificationOutboxRepository` + implementation
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.Commons/Interfaces/Repositories/INotificationOutboxRepository.cs`
|
||||
- Create: `src/ScadaLink.ConfigurationDatabase/Repositories/NotificationOutboxRepository.cs`
|
||||
- Modify: `src/ScadaLink.ConfigurationDatabase/ServiceCollectionExtensions.cs` (register `AddScoped`)
|
||||
- Test: `tests/ScadaLink.ConfigurationDatabase.Tests/RepositoryTests.cs`
|
||||
|
||||
**Step 1 — failing tests.** Cover the operations the outbox actor needs:
|
||||
- `InsertIfNotExistsAsync` inserts a new row and returns `true`; a second call with the same `NotificationId` returns `false` and does not duplicate (idempotency key).
|
||||
- `GetDueAsync(now, batchSize)` returns `Pending` rows and `Retrying` rows with `NextAttemptAt <= now`, ordered by `CreatedAt`, capped at `batchSize`.
|
||||
- `UpdateAsync` persists status transitions.
|
||||
- `GetByIdAsync` returns a row or null.
|
||||
- `QueryAsync(filter, page, pageSize)` filters by status/type/source site and paginates.
|
||||
- `DeleteTerminalOlderThanAsync(cutoff)` bulk-deletes `Delivered`/`Parked`/`Discarded` rows older than `cutoff` and returns the count; leaves non-terminal rows.
|
||||
- `ComputeKpisAsync` returns queue depth, stuck count, parked count, delivered-last-window, oldest-pending age.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Interface:
|
||||
```csharp
|
||||
public interface INotificationOutboxRepository
|
||||
{
|
||||
Task<bool> InsertIfNotExistsAsync(Notification n, CancellationToken ct = default);
|
||||
Task<IReadOnlyList<Notification>> GetDueAsync(DateTimeOffset now, int batchSize, CancellationToken ct = default);
|
||||
Task<Notification?> GetByIdAsync(string notificationId, CancellationToken ct = default);
|
||||
Task UpdateAsync(Notification n, CancellationToken ct = default);
|
||||
Task<(IReadOnlyList<Notification> Rows, int TotalCount)> QueryAsync(
|
||||
NotificationOutboxFilter filter, int pageNumber, int pageSize, CancellationToken ct = default);
|
||||
Task<int> DeleteTerminalOlderThanAsync(DateTimeOffset cutoff, CancellationToken ct = default);
|
||||
Task<NotificationKpiSnapshot> ComputeKpisAsync(DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken ct = default);
|
||||
Task<int> SaveChangesAsync(CancellationToken ct = default);
|
||||
}
|
||||
```
|
||||
`NotificationOutboxFilter` (a `record` in Commons `Types/`) and `NotificationKpiSnapshot` (a `record`) are created in this task alongside the interface. `InsertIfNotExistsAsync`: check `await _context.Notifications.FindAsync(...)`, if present return false, else `AddAsync` + `SaveChangesAsync`, return true. `DeleteTerminalOlderThanAsync`: use `ExecuteDeleteAsync` with a `Where` on terminal statuses and `CreatedAt < cutoff`. Register in `ServiceCollectionExtensions.AddConfigurationDatabase`.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add NotificationOutbox repository`).
|
||||
|
||||
---
|
||||
|
||||
### Task 6: EF migration
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.ConfigurationDatabase/Migrations/<timestamp>_AddNotificationsTable.cs` (generated)
|
||||
|
||||
**Step 1 — generate:**
|
||||
```bash
|
||||
dotnet ef migrations add AddNotificationsTable --project src/ScadaLink.ConfigurationDatabase
|
||||
```
|
||||
This also picks up the `NotificationList.Type` column from Task 3.
|
||||
|
||||
**Step 2 — verify.** Inspect the generated migration: confirm a `Notifications` table with the columns and two indexes from Task 4, and an `AlterColumn`/`AddColumn` for `NotificationLists.Type`. Run the ConfigurationDatabase test project — the SQLite `EnsureCreated()` fixture builds from the model, and `dotnet build ScadaLink.slnx` must succeed.
|
||||
|
||||
**Step 3 — run:** `dotnet test tests/ScadaLink.ConfigurationDatabase.Tests/ScadaLink.ConfigurationDatabase.Tests.csproj` → PASS.
|
||||
|
||||
**Step 4 — commit** (`feat(notification-outbox): add Notifications table migration`).
|
||||
|
||||
---
|
||||
|
||||
## Phase B — Message contracts (Commons)
|
||||
|
||||
### Task 7: Site↔central notification message contracts
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.Commons/Messages/Notification/NotificationMessages.cs`
|
||||
- Test: `tests/ScadaLink.Commons.Tests/Messages/NotificationMessagesTests.cs`
|
||||
|
||||
**Step 1 — failing test.** A trivial construction/round-trip test (these are records — assert positional construction and value equality; if the project has a serialization test helper, round-trip through it).
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Namespace `ScadaLink.Commons.Messages.Notification`:
|
||||
```csharp
|
||||
// Site → Central: submit a notification for central delivery (fire-and-forget with ack).
|
||||
public record NotificationSubmit(
|
||||
string NotificationId, string ListName, string Subject, string Body,
|
||||
string SourceSiteId, string? SourceInstanceId, string? SourceScript,
|
||||
DateTimeOffset SiteEnqueuedAt);
|
||||
|
||||
// Central → Site: ack after the row is persisted (idempotent — safe to re-send).
|
||||
public record NotificationSubmitAck(string NotificationId, bool Accepted, string? Error);
|
||||
|
||||
// Site → Central: query delivery status for a NotificationId.
|
||||
public record NotificationStatusQuery(string CorrelationId, string NotificationId);
|
||||
|
||||
public record NotificationStatusResponse(
|
||||
string CorrelationId, bool Found, string Status,
|
||||
int RetryCount, string? LastError,
|
||||
DateTimeOffset? DeliveredAt);
|
||||
```
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add site/central notification message contracts`).
|
||||
|
||||
---
|
||||
|
||||
### Task 8: Outbox UI query/action contracts
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.Commons/Messages/Notification/NotificationOutboxQueries.cs`
|
||||
- Test: `tests/ScadaLink.Commons.Tests/Messages/NotificationOutboxQueriesTests.cs`
|
||||
|
||||
**Step 1 — failing test.** Construction test as in Task 7.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Records the Central UI / `CommunicationService` use to talk to the outbox actor:
|
||||
```csharp
|
||||
public record NotificationOutboxQueryRequest(
|
||||
string CorrelationId, string? StatusFilter, string? TypeFilter, string? SourceSiteFilter,
|
||||
string? ListNameFilter, bool StuckOnly, string? SubjectKeyword,
|
||||
DateTimeOffset? From, DateTimeOffset? To, int PageNumber, int PageSize);
|
||||
|
||||
public record NotificationSummary(
|
||||
string NotificationId, string Type, string ListName, string Subject, string Status,
|
||||
int RetryCount, string? LastError, string SourceSiteId, string? SourceInstanceId,
|
||||
DateTimeOffset CreatedAt, DateTimeOffset? DeliveredAt, bool IsStuck);
|
||||
|
||||
public record NotificationOutboxQueryResponse(
|
||||
string CorrelationId, bool Success, string? ErrorMessage,
|
||||
IReadOnlyList<NotificationSummary> Notifications, int TotalCount);
|
||||
|
||||
public record RetryNotificationRequest(string CorrelationId, string NotificationId);
|
||||
public record RetryNotificationResponse(string CorrelationId, bool Success, string? ErrorMessage);
|
||||
public record DiscardNotificationRequest(string CorrelationId, string NotificationId);
|
||||
public record DiscardNotificationResponse(string CorrelationId, bool Success, string? ErrorMessage);
|
||||
|
||||
public record NotificationKpiRequest(string CorrelationId);
|
||||
public record NotificationKpiResponse(
|
||||
string CorrelationId, int QueueDepth, int StuckCount, int ParkedCount,
|
||||
int DeliveredLastInterval, TimeSpan? OldestPendingAge);
|
||||
```
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add outbox query and action contracts`).
|
||||
|
||||
---
|
||||
|
||||
## Phase C — NotificationOutbox project + delivery
|
||||
|
||||
### Task 9: Create the `ScadaLink.NotificationOutbox` project
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.NotificationOutbox/ScadaLink.NotificationOutbox.csproj`
|
||||
- Create: `tests/ScadaLink.NotificationOutbox.Tests/ScadaLink.NotificationOutbox.Tests.csproj`
|
||||
- Modify: `ScadaLink.slnx` (add both projects)
|
||||
|
||||
**Step 1 — create the projects.** Copy the `.csproj` shape from `src/ScadaLink.NotificationService/ScadaLink.NotificationService.csproj` (same `TargetFramework`, central-managed package versions via `Directory.Packages.props`). The src project references `ScadaLink.Commons` and Akka packages (`Akka`, `Akka.Cluster.Tools`). The test project mirrors `tests/ScadaLink.NotificationService.Tests/` (xUnit, NSubstitute, `Akka.TestKit.Xunit2`) and references the new src project. Add both `<Project>` entries to `ScadaLink.slnx`.
|
||||
|
||||
**Step 2 — add a placeholder test** so the test project is non-empty:
|
||||
```csharp
|
||||
public class ProjectSmokeTest { [Fact] public void ProjectCompiles() => Assert.True(true); }
|
||||
```
|
||||
|
||||
**Step 3 — verify:** `dotnet build ScadaLink.slnx` succeeds; `dotnet test tests/ScadaLink.NotificationOutbox.Tests/ScadaLink.NotificationOutbox.Tests.csproj` → PASS.
|
||||
|
||||
**Step 4 — commit** (`feat(notification-outbox): scaffold ScadaLink.NotificationOutbox project`).
|
||||
|
||||
---
|
||||
|
||||
### Task 10: `NotificationOutboxOptions`
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.NotificationOutbox/NotificationOutboxOptions.cs`
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxOptionsTests.cs`
|
||||
|
||||
**Step 1 — failing test.** Assert the defaults.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.**
|
||||
```csharp
|
||||
public class NotificationOutboxOptions
|
||||
{
|
||||
public TimeSpan DispatchInterval { get; set; } = TimeSpan.FromSeconds(10);
|
||||
public int DispatchBatchSize { get; set; } = 100;
|
||||
public TimeSpan StuckAgeThreshold { get; set; } = TimeSpan.FromMinutes(10);
|
||||
public TimeSpan TerminalRetention { get; set; } = TimeSpan.FromDays(365);
|
||||
public TimeSpan PurgeInterval { get; set; } = TimeSpan.FromDays(1);
|
||||
public TimeSpan DeliveredKpiWindow { get; set; } = TimeSpan.FromMinutes(1);
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add NotificationOutboxOptions`).
|
||||
|
||||
---
|
||||
|
||||
### Task 11: `INotificationDeliveryAdapter` + `DeliveryOutcome`
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.NotificationOutbox/Delivery/INotificationDeliveryAdapter.cs`
|
||||
- Create: `src/ScadaLink.NotificationOutbox/Delivery/DeliveryOutcome.cs`
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/Delivery/DeliveryOutcomeTests.cs`
|
||||
|
||||
**Step 1 — failing test.** Assert `DeliveryOutcome` factory methods produce the right classification.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Mirror the External System Gateway error-classification pattern:
|
||||
```csharp
|
||||
public enum DeliveryResult { Success, TransientFailure, PermanentFailure }
|
||||
|
||||
public record DeliveryOutcome(DeliveryResult Result, string? ResolvedTargets, string? Error)
|
||||
{
|
||||
public static DeliveryOutcome Success(string resolvedTargets) => new(DeliveryResult.Success, resolvedTargets, null);
|
||||
public static DeliveryOutcome Transient(string error) => new(DeliveryResult.TransientFailure, null, error);
|
||||
public static DeliveryOutcome Permanent(string error) => new(DeliveryResult.PermanentFailure, null, error);
|
||||
}
|
||||
|
||||
public interface INotificationDeliveryAdapter
|
||||
{
|
||||
NotificationType Type { get; }
|
||||
Task<DeliveryOutcome> DeliverAsync(Notification notification, CancellationToken ct = default);
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add delivery adapter abstraction`).
|
||||
|
||||
---
|
||||
|
||||
### Task 12: `EmailNotificationDeliveryAdapter`
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs`
|
||||
- Modify: `src/ScadaLink.NotificationOutbox/ScadaLink.NotificationOutbox.csproj` (reference `ScadaLink.NotificationService` for `ISmtpClientWrapper`)
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/Delivery/EmailNotificationDeliveryAdapterTests.cs`
|
||||
|
||||
**Step 1 — failing tests.** Using NSubstitute mocks of `INotificationOutboxRepository`-resolved data and a substituted `ISmtpClientWrapper`:
|
||||
- list resolved + send succeeds → `DeliveryResult.Success`, `ResolvedTargets` lists the recipient addresses.
|
||||
- list not found / no recipients → `PermanentFailure`.
|
||||
- SMTP throws `SmtpPermanentException` → `PermanentFailure`.
|
||||
- SMTP throws a transient error (socket/timeout) → `TransientFailure`.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** The adapter resolves the list + recipients + SMTP config from `INotificationRepository` (the existing notification-list repo — recipients are resolved centrally at delivery time), composes and sends via the existing `ISmtpClientWrapper` (`Func<ISmtpClientWrapper>` injected, same as `NotificationService`), classifies errors identically to `NotificationDeliveryService`. Reuse the SMTP composition logic from `src/ScadaLink.NotificationService/NotificationDeliveryService.cs` (BCC delivery, plain text, address validation, the `SmtpPermanentException` → permanent mapping). On success return `DeliveryOutcome.Success(<comma-joined recipient addresses>)`. `Type => NotificationType.Email`.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add Email delivery adapter`).
|
||||
|
||||
---
|
||||
|
||||
### Task 13: `NotificationOutboxActor` — ingest
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs`
|
||||
- Create: `src/ScadaLink.NotificationOutbox/Messages/InternalMessages.cs` (actor-internal tick messages)
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorIngestTests.cs`
|
||||
|
||||
**Step 1 — failing tests** (TestKit). The actor takes `IServiceProvider`, `NotificationOutboxOptions`, `ILogger`. Use a mocked `INotificationOutboxRepository` registered in the test `ServiceProvider`:
|
||||
- Send `NotificationSubmit` → actor calls `InsertIfNotExistsAsync` with a `Notification` whose fields map from the message, `Status = Pending`, `CreatedAt` set; replies `NotificationSubmitAck(NotificationId, Accepted: true, null)` to `Sender`.
|
||||
- Send the same `NotificationSubmit` twice → second `InsertIfNotExistsAsync` returns false; actor still replies `Accepted: true` (idempotent — the row already exists, ack so the site clears its buffer).
|
||||
- Repository throws → actor replies `Accepted: false` with the error (site will retry the forward).
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** `ReceiveActor`. On `NotificationSubmit`: build a `Notification`, `CreateScope()` to resolve `INotificationOutboxRepository`, call `InsertIfNotExistsAsync`, `PipeTo` the result back so the reply preserves `Sender`. Reply `NotificationSubmitAck`. Keep dispatch (Task 14) out of this task — ingest only.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add NotificationOutboxActor ingest`).
|
||||
|
||||
---
|
||||
|
||||
### Task 14: `NotificationOutboxActor` — dispatcher loop
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs`
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs`
|
||||
|
||||
**Step 1 — failing tests** (TestKit, with a registered set of `INotificationDeliveryAdapter` and a mocked repo):
|
||||
- On a `DispatchTick`, the actor calls `GetDueAsync`, and for each row invokes the adapter for its `Type`.
|
||||
- adapter `Success` → row updated to `Delivered`, `DeliveredAt`/`ResolvedTargets`/`LastAttemptAt` set, `UpdateAsync` called.
|
||||
- adapter `TransientFailure` → `Retrying`, `RetryCount` incremented, `NextAttemptAt = now + retry interval`, `LastError` set.
|
||||
- adapter `TransientFailure` when `RetryCount` already at the SMTP-config max → `Parked`.
|
||||
- adapter `PermanentFailure` → `Parked` immediately, `LastError` set.
|
||||
- no adapter for the row's `Type` → `Parked` with an explanatory error.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** `IWithTimers`; in `PreStart` start a periodic `DispatchTick` every `options.DispatchInterval`. On `DispatchTick`: scope-resolve the repo, `GetDueAsync(now, options.DispatchBatchSize)`, and for each notification resolve the adapter from a `Dictionary<NotificationType, INotificationDeliveryAdapter>` (injected), `await DeliverAsync`, apply the status transition, `UpdateAsync`. Retry count/interval come from the central SMTP config (`SmtpConfiguration.MaxRetries` / `RetryDelay` via `INotificationRepository`). Run delivery on a blocking-safe path (the actor `PipeTo`s the async work; do not block the actor thread). Guard against overlapping ticks (ignore a new tick while one is in flight).
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add dispatcher loop to NotificationOutboxActor`).
|
||||
|
||||
---
|
||||
|
||||
### Task 15: `NotificationOutboxActor` — query, retry, discard, KPIs
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs`
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorQueryTests.cs`
|
||||
|
||||
**Step 1 — failing tests** (TestKit):
|
||||
- `NotificationOutboxQueryRequest` → actor calls `QueryAsync`, replies `NotificationOutboxQueryResponse` with mapped `NotificationSummary` rows; `IsStuck` true when `Status` is `Pending`/`Retrying` and `CreatedAt` older than `options.StuckAgeThreshold`.
|
||||
- `NotificationStatusQuery` → replies `NotificationStatusResponse` (`Found:false` when the id is unknown).
|
||||
- `RetryNotificationRequest` on a `Parked` row → row reset to `Pending`, `RetryCount` 0, `NextAttemptAt` cleared; replies success. On a non-`Parked` row → `Success:false`.
|
||||
- `DiscardNotificationRequest` on a `Parked` row → `Status = Discarded`; replies success.
|
||||
- `NotificationKpiRequest` → replies `NotificationKpiResponse` from `ComputeKpisAsync` (stuck cutoff = now − `StuckAgeThreshold`; delivered window = now − `DeliveredKpiWindow`).
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement** the additional `Receive<>` handlers, each scope-resolving the repo and `PipeTo`-ing the reply.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add query, retry, discard, and KPI handlers`).
|
||||
|
||||
---
|
||||
|
||||
### Task 16: Daily purge job
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs`
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/NotificationOutboxActorPurgeTests.cs`
|
||||
|
||||
**Step 1 — failing test.** On a `PurgeTick`, the actor calls `DeleteTerminalOlderThanAsync(now − options.TerminalRetention)`.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** In `PreStart` start a second periodic timer `PurgeTick` every `options.PurgeInterval`. Handler scope-resolves the repo and calls `DeleteTerminalOlderThanAsync`; log the deleted count.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add daily terminal-row purge`).
|
||||
|
||||
---
|
||||
|
||||
### Task 17: `AddNotificationOutbox` DI extension
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.NotificationOutbox/ServiceCollectionExtensions.cs`
|
||||
- Test: `tests/ScadaLink.NotificationOutbox.Tests/ServiceRegistrationTests.cs`
|
||||
|
||||
**Step 1 — failing test.** Build a `ServiceCollection`, call `AddNotificationOutbox`, and assert `NotificationOutboxOptions`, the `EmailNotificationDeliveryAdapter`, and the adapter dictionary resolve.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** `public const string OptionsSection = "ScadaLink:NotificationOutbox";` plus `AddNotificationOutbox(this IServiceCollection)` registering `AddOptions<NotificationOutboxOptions>().BindConfiguration(OptionsSection)`, the SMTP client `Func<ISmtpClientWrapper>` (reuse `NotificationService`'s registration or register here), `EmailNotificationDeliveryAdapter`, and a registration that exposes `IReadOnlyDictionary<NotificationType, INotificationDeliveryAdapter>` built from all registered adapters.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add AddNotificationOutbox DI registration`).
|
||||
|
||||
---
|
||||
|
||||
## Phase D — Site retarget + central wiring
|
||||
|
||||
### Task 18: Retarget the site S&F notification handler to forward to central
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.StoreAndForward/StoreAndForwardService.cs` and/or the site registration that wires the `Notification` category delivery handler
|
||||
- Modify: `src/ScadaLink.Host/SiteServiceRegistration.cs` (where the notification handler is registered)
|
||||
- Test: `tests/ScadaLink.StoreAndForward.Tests/` (a test that the registered notification handler forwards to the communication actor and treats an ack as success)
|
||||
|
||||
**Step 1 — investigate + failing test.** Currently the `Notification` category handler calls `NotificationDeliveryService.DeliverBufferedAsync`. The new handler must instead send a `NotificationSubmit` to central via the site's communication actor (`ClusterClient.Send("/user/central-communication", submit)`) and treat a `NotificationSubmitAck(Accepted:true)` as delivered (`true`), a non-ack/timeout as transient (throw), so S&F retries the forward. Write a test with a `TestProbe` standing in for the central client: handler invoked → probe receives `NotificationSubmit`; reply `NotificationSubmitAck(Accepted:true)` → handler result `true`; timeout → handler throws (transient).
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Add a `NotificationForwarder` (small class or the handler lambda) that holds the site communication actor ref and does `Ask<NotificationSubmitAck>` with the host-configured forward-retry timeout. Register it as the `StoreAndForwardCategory.Notification` delivery handler in `SiteServiceRegistration`, replacing the `NotificationDeliveryService` handler. The S&F engine already buffers/retries on a thrown (transient) result — no S&F core change needed.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): forward site S&F notifications to central`).
|
||||
|
||||
---
|
||||
|
||||
### Task 19: `Notify.Send` async + `Notify.Status` (SiteRuntime)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.SiteRuntime/Scripts/ScriptRuntimeContext.cs` (`NotifyHelper`, `NotifyTarget`)
|
||||
- Test: `tests/ScadaLink.SiteRuntime.Tests/` (Notify API tests)
|
||||
|
||||
**Step 1 — failing tests.**
|
||||
- `Notify.To("list").Send("subj","body")` generates a GUID `NotificationId`, enqueues a `StoreAndForwardCategory.Notification` message into `StoreAndForwardService` (target `"central"`, payload = serialized `NotificationSubmit`), and returns the `NotificationId` string immediately.
|
||||
- `Notify.Status(id)` issues a `NotificationStatusQuery` to central and returns the mapped status record; while the notification is still in the site S&F buffer (central has no row / query says `Found:false` but the S&F buffer still holds the id) it reports `Forwarding`.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Change `NotifyTarget.Send` to return `Task<string>` (the `NotificationId`): create the GUID, build a `NotificationSubmit` (with `SourceSiteId`, `SourceInstanceId = _instanceName`, `SiteEnqueuedAt = UtcNow`), `EnqueueAsync(Notification, "central", payloadJson)`. Add `NotifyHelper.Status(string notificationId)` returning a status record: query central via the site communication actor; if central returns `Found:false` and the id is still buffered in S&F, return status `Forwarding`. Keep the script-facing surface minimal (`Send`, `Status`).
|
||||
|
||||
**Step 2 note:** the `Notify` API is consumed by compiled scripts — confirm the script trust model / compilation still accepts the changed signature; update any script-API surface tests.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): async Notify.Send with status handle`).
|
||||
|
||||
---
|
||||
|
||||
### Task 20: Central ingest routing
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs`
|
||||
- Test: `tests/ScadaLink.Communication.Tests/CentralCommunicationActorTests.cs`
|
||||
|
||||
**Step 1 — failing test.** When `CentralCommunicationActor` receives a `NotificationSubmit` (sent site→central via ClusterClient to `/user/central-communication`), it forwards it to the notification-outbox singleton proxy and the ack flows back to the original `Sender`. Use a `TestProbe` for the outbox proxy.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** `CentralCommunicationActor` takes an optional outbox-proxy `IActorRef` (passed at construction by the Host, Task 21). `Receive<NotificationSubmit>(m => _outboxProxy.Forward(m))` — `Forward` preserves the original sender so the `NotificationSubmitAck` returns to the site's ClusterClient.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): route NotificationSubmit to the outbox actor`).
|
||||
|
||||
---
|
||||
|
||||
### Task 21: Host registration + appsettings
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.Host/Actors/AkkaHostedService.cs` (`RegisterCentralActors`)
|
||||
- Modify: `src/ScadaLink.Host/Program.cs` (call `AddNotificationOutbox`; `Configure<NotificationOutboxOptions>`)
|
||||
- Modify: `src/ScadaLink.Host/appsettings.Central.json` (`ScadaLink:NotificationOutbox` section)
|
||||
- Modify: `src/ScadaLink.Host/appsettings.Site.json` (site→central notification forward-retry interval, if not already covered by S&F config)
|
||||
- Test: `tests/ScadaLink.Host.Tests/` if present, else verify via build + the integration test in Task 25
|
||||
|
||||
**Step 1 — implement.** In `RegisterCentralActors`: create the `NotificationOutboxActor` as a **cluster singleton** (`ClusterSingletonManager.Props` + `ClusterSingletonProxy.Props`, singleton name `"notification-outbox"`, no explicit role — central nodes only run this role), passing `IServiceProvider`, `NotificationOutboxOptions`, the adapter dictionary, and a logger. Pass the singleton **proxy** ref into `CentralCommunicationActor`'s `Props.Create`. In `Program.cs` central path, call `builder.Services.AddNotificationOutbox()` and `services.Configure<NotificationOutboxOptions>(...GetSection(ServiceCollectionExtensions.OptionsSection))`. Add the `ScadaLink:NotificationOutbox` block to `appsettings.Central.json` with the Task 10 defaults.
|
||||
|
||||
**Step 2 — verify:** `dotnet build ScadaLink.slnx` succeeds.
|
||||
|
||||
**Step 3 — commit** (`feat(notification-outbox): register NotificationOutbox singleton in Host`).
|
||||
|
||||
---
|
||||
|
||||
## Phase E — Central UI
|
||||
|
||||
### Task 22: `CommunicationService` outbox methods
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.Communication/CommunicationService.cs`
|
||||
- Test: `tests/ScadaLink.Communication.Tests/CommunicationServiceTests.cs` (or the existing service test file)
|
||||
|
||||
**Step 1 — failing tests.** New methods `QueryNotificationOutboxAsync`, `RetryNotificationAsync`, `DiscardNotificationAsync`, `GetNotificationKpisAsync` each `Ask` the central outbox proxy and return the typed response. (These are central-side and do not go through `SiteEnvelope` — they talk to the local outbox proxy directly.) Test with a `TestProbe` for the proxy.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Add an outbox-proxy `IActorRef` to `CommunicationService` (set by the Host like `SetCommunicationActor`). Each method `Ask<TResponse>(request, _options.QueryTimeout)`.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add CommunicationService outbox methods`).
|
||||
|
||||
---
|
||||
|
||||
### Task 23: Notification Outbox Blazor page + nav entry
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ScadaLink.CentralUI/Components/Pages/Monitoring/NotificationOutbox.razor`
|
||||
- Modify: `src/ScadaLink.CentralUI/Components/Layout/NavMenu.razor`
|
||||
- Test: `tests/ScadaLink.CentralUI.Tests/Pages/NotificationOutboxPageTests.cs` (bUnit)
|
||||
|
||||
**Step 1 — failing test** (bUnit). Render the page with a substituted `CommunicationService` returning a fixed KPI response and a page of `NotificationSummary` rows; assert the KPI tiles show the values and the table renders the rows; assert clicking Retry on a `Parked` row calls `RetryNotificationAsync`.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Model the page on `Components/Pages/Monitoring/ParkedMessages.razor`: `@page "/monitoring/notification-outbox"`, `@attribute [Authorize(Policy = AuthorizationPolicies.RequireDeployment)]`. KPI tile row (Bootstrap `card` tiles like `Health.razor`) bound to `GetNotificationKpisAsync`; a filter card (status, type, source site, list, time range, stuck-only toggle, subject keyword); a table of `NotificationSummary` with stuck rows badged; Retry/Discard buttons on `Parked` rows using `IDialogService.ConfirmAsync` + `ToastNotification`. Add a `NavLink` to `NavMenu.razor` under the Deployment-role Monitoring section (`href="/monitoring/notification-outbox"`).
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add Notification Outbox UI page`).
|
||||
|
||||
---
|
||||
|
||||
### Task 24: Health dashboard outbox KPI tiles
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor`
|
||||
- Test: `tests/ScadaLink.CentralUI.Tests/Pages/HealthPageTests.cs` (extend if present)
|
||||
|
||||
**Step 1 — failing test** (bUnit). With a substituted `CommunicationService.GetNotificationKpisAsync`, the Health page renders three headline outbox tiles: queue depth, stuck count, parked count.
|
||||
|
||||
**Step 2 — run red.**
|
||||
|
||||
**Step 3 — implement.** Add a "Notification Outbox" tile row to `Health.razor`, fetched on init / on the existing 10s polling timer, styled like the existing overview cards.
|
||||
|
||||
**Step 4 — run green. Step 5 — commit** (`feat(notification-outbox): add outbox KPI tiles to Health dashboard`).
|
||||
|
||||
---
|
||||
|
||||
## Phase F — Integration & verification
|
||||
|
||||
### Task 25: End-to-end integration test
|
||||
|
||||
**Files:**
|
||||
- Create: `tests/ScadaLink.IntegrationTests/NotificationOutboxFlowTests.cs`
|
||||
|
||||
**Step 1 — failing test.** Following the patterns in `tests/ScadaLink.IntegrationTests/`, exercise the flow with an in-memory/SQLite-backed `ScadaLinkDbContext` and a real `NotificationOutboxActor`: submit a `NotificationSubmit` → assert a `Notifications` row exists (`Pending`) → trigger a `DispatchTick` with a stub adapter that returns `Success` → assert the row is `Delivered`. Add a second case: stub adapter returns `PermanentFailure` → row `Parked`; then a `RetryNotificationRequest` → row back to `Pending`.
|
||||
|
||||
**Step 2 — run red. Step 3 — make it pass** (it should, if Phases A–D are correct; fix any wiring gaps found). **Step 4 — commit** (`test(notification-outbox): end-to-end outbox flow integration test`).
|
||||
|
||||
---
|
||||
|
||||
### Task 26: Full build + suite verification
|
||||
|
||||
**Files:** none (verification only).
|
||||
|
||||
**Step 1:** `dotnet build ScadaLink.slnx` → must succeed with no errors.
|
||||
|
||||
**Step 2:** `dotnet test ScadaLink.slnx` → the whole suite must pass. Investigate and fix any regressions (notably in `ScadaLink.NotificationService.Tests`, `ScadaLink.StoreAndForward.Tests`, `ScadaLink.SiteRuntime.Tests`, `ScadaLink.Communication.Tests` — the docs/design changed the notification path and existing tests may assert old behavior; update them to the new design).
|
||||
|
||||
**Step 3:** If the docker cluster is used for smoke testing, note that `bash docker/deploy.sh` rebuilds the image — out of scope for this plan unless the user asks.
|
||||
|
||||
**Step 4 — commit** any test fixes (`test(notification-outbox): update existing tests for the central-delivery model`).
|
||||
|
||||
---
|
||||
|
||||
## Follow-ups (post-merge, not blocking)
|
||||
|
||||
- **Remove the now-dead site-side `AddNotificationService()` (from Task 19 review).** After Task 19, the site script runtime no longer resolves `INotificationDeliveryService` (it enqueues into the Store-and-Forward engine instead). `src/ScadaLink.Host/SiteServiceRegistration.cs` still calls `AddNotificationService()`. Task 21 (Host registration) should drop it from the site path — `NotificationService` is now central-only.
|
||||
- **Re-align the Central UI script sandbox `Notify` API (from Task 19 review).** `SandboxNotifyTarget.Send` in `src/ScadaLink.CentralUI/ScriptAnalysis/` still returns `Task<NotificationResult>` and has no `Status` method, while the production `NotifyTarget.Send` now returns `Task<string>` plus `Notify.Status`. A script that test-runs cleanly in the sandbox would not compile against the real runtime. The sandbox `Notify` surface should be rewritten to match production so the test-run feature stays faithful.
|
||||
- **Populate `SourceScript` on outbound notifications (from Task 19 review).** `NotifyTarget.Send` currently passes `SourceScript: null` — the executing script name is not threaded down to the `NotifyHelper`. The payload field and the forwarder already carry it end to end; only the enqueue side needs the wiring.
|
||||
- **Share the SMTP helpers (from Task 12 review).** `EmailNotificationDeliveryAdapter` reimplements `ClassifySmtpError`/`SmtpErrorClass`, `ValidateAddresses`, and a `ScrubCredentials` helper because the originals are `internal` to `ScadaLink.NotificationService`. To avoid divergence (especially in the security-relevant credential redaction and the SMTP 4xx/5xx classification policy), promote `CredentialRedactor` to `public`, extract a `public static SmtpErrorClassifier`, and make `ValidateAddresses` shared — then have the adapter call them and delete the duplicates. The project reference already exists, so this is low-cost.
|
||||
|
||||
## Done
|
||||
|
||||
The Notification Outbox feature is implemented end to end: site scripts enqueue notifications that store-and-forward to central, the `NotificationOutboxActor` singleton ingests them into the `Notifications` table and delivers them via the Email adapter with retry/parking, operators see KPIs and manage notifications from the Central UI, and the full test suite passes. Teams and other delivery adapters can be added later by implementing `INotificationDeliveryAdapter` and registering it — no other change required.
|
||||
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-19-notification-outbox-implementation.md",
|
||||
"tasks": [
|
||||
{"id": 18, "subject": "Task 1: Notification enums", "status": "pending"},
|
||||
{"id": 19, "subject": "Task 2: Notification entity POCO", "status": "pending", "blockedBy": [18]},
|
||||
{"id": 20, "subject": "Task 3: Type field on NotificationList", "status": "pending", "blockedBy": [19]},
|
||||
{"id": 21, "subject": "Task 4: Notification EF configuration + DbSet", "status": "pending", "blockedBy": [20]},
|
||||
{"id": 22, "subject": "Task 5: NotificationOutbox repository", "status": "pending", "blockedBy": [21]},
|
||||
{"id": 23, "subject": "Task 6: EF migration AddNotificationsTable", "status": "pending", "blockedBy": [22]},
|
||||
{"id": 24, "subject": "Task 7: Site/central notification message contracts", "status": "pending", "blockedBy": [23]},
|
||||
{"id": 25, "subject": "Task 8: Outbox query/action contracts", "status": "pending", "blockedBy": [24]},
|
||||
{"id": 26, "subject": "Task 9: Scaffold ScadaLink.NotificationOutbox project", "status": "pending", "blockedBy": [25]},
|
||||
{"id": 27, "subject": "Task 10: NotificationOutboxOptions", "status": "pending", "blockedBy": [26]},
|
||||
{"id": 28, "subject": "Task 11: Delivery adapter abstraction", "status": "pending", "blockedBy": [27]},
|
||||
{"id": 29, "subject": "Task 12: Email delivery adapter", "status": "pending", "blockedBy": [28]},
|
||||
{"id": 30, "subject": "Task 13: NotificationOutboxActor ingest", "status": "pending", "blockedBy": [29]},
|
||||
{"id": 31, "subject": "Task 14: Dispatcher loop", "status": "pending", "blockedBy": [30]},
|
||||
{"id": 32, "subject": "Task 15: Query, retry, discard, KPI handlers", "status": "pending", "blockedBy": [31]},
|
||||
{"id": 33, "subject": "Task 16: Daily purge job", "status": "pending", "blockedBy": [32]},
|
||||
{"id": 34, "subject": "Task 17: AddNotificationOutbox DI extension", "status": "pending", "blockedBy": [33]},
|
||||
{"id": 35, "subject": "Task 18: Retarget site S&F notification handler to central", "status": "pending", "blockedBy": [34]},
|
||||
{"id": 36, "subject": "Task 19: Async Notify.Send + Notify.Status", "status": "pending", "blockedBy": [35]},
|
||||
{"id": 37, "subject": "Task 20: Central ingest routing", "status": "pending", "blockedBy": [36]},
|
||||
{"id": 38, "subject": "Task 21: Host registration + appsettings", "status": "pending", "blockedBy": [37]},
|
||||
{"id": 39, "subject": "Task 22: CommunicationService outbox methods", "status": "pending", "blockedBy": [38]},
|
||||
{"id": 40, "subject": "Task 23: Notification Outbox Blazor page", "status": "pending", "blockedBy": [39]},
|
||||
{"id": 41, "subject": "Task 24: Health dashboard outbox KPI tiles", "status": "pending", "blockedBy": [40]},
|
||||
{"id": 42, "subject": "Task 25: End-to-end integration test", "status": "pending", "blockedBy": [41]},
|
||||
{"id": 43, "subject": "Task 26: Full build + suite verification", "status": "pending", "blockedBy": [42]}
|
||||
],
|
||||
"lastUpdated": "2026-05-19"
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
# Notifications Nav Group — Design
|
||||
|
||||
**Date:** 2026-05-19
|
||||
|
||||
**Goal:** Consolidate all notification-related Central UI pages into a dedicated
|
||||
**Notifications** left-menu section, split the combined Outbox page into a report
|
||||
and a KPIs page, give Notification Lists a proper home, and add a per-source-site
|
||||
KPI breakdown.
|
||||
|
||||
## Background
|
||||
|
||||
Notification-related UI is currently scattered:
|
||||
|
||||
| Page | Route | Nav section | Policy |
|
||||
|---|---|---|---|
|
||||
| SMTP Configuration | `/admin/smtp` | Admin | RequireAdmin |
|
||||
| Notification Outbox (KPI tiles **+** filterable table) | `/monitoring/notification-outbox` | Monitoring | RequireDeployment |
|
||||
| Notification Lists | `/design/notification-lists/...` (form only) | none — table embedded in the External Systems page | RequireDesign |
|
||||
|
||||
The Outbox page mixes KPI tiles and the filterable `Notifications`-table report on
|
||||
one page. Notification Lists has no list page of its own — its table is bolted
|
||||
onto `ExternalSystems.razor`. KPI infrastructure
|
||||
(`NotificationKpiRequest`/`Response`, `INotificationOutboxRepository.ComputeKpisAsync`)
|
||||
is global-only, despite CLAUDE.md stating KPIs are "global + per-source-site".
|
||||
|
||||
## Architecture
|
||||
|
||||
A new **Notifications** left-menu section consolidates these pages. Routes move
|
||||
to a consistent `/notifications/*` prefix. The combined Outbox page is split into
|
||||
two. Notification Lists gets a dedicated page. A bounded backend addition supplies
|
||||
per-source-site KPIs. No actor topology, persistence, or message-evolution rules
|
||||
change beyond the additive KPI contracts.
|
||||
|
||||
## 1. Nav menu
|
||||
|
||||
New `Notifications` section in `NavMenu.razor`, placed **between Deployment and
|
||||
Monitoring**. Final section order: Dashboard, Admin, Design, Deployment,
|
||||
Notifications, Monitoring, Audit Log.
|
||||
|
||||
| Menu item | Route | Policy |
|
||||
|---|---|---|
|
||||
| SMTP Configuration | `/notifications/smtp` | RequireAdmin |
|
||||
| Notification Lists | `/notifications/lists` | RequireDesign |
|
||||
| Notification Report | `/notifications/report` | RequireDeployment |
|
||||
| Notification KPIs | `/notifications/kpis` | RequireDeployment |
|
||||
|
||||
Each item is wrapped in its own per-item `AuthorizeView` policy (same pattern the
|
||||
Monitoring section already uses for its mixed-role items). The section header is a
|
||||
plain `div` — every authenticated user holds at least one of Admin/Design/Deployment,
|
||||
so the header always has ≥1 visible child and cannot be orphaned.
|
||||
|
||||
SMTP Configuration is **removed** from the Admin section; Notification Outbox is
|
||||
**removed** from the Monitoring section.
|
||||
|
||||
## 2. SMTP Configuration
|
||||
|
||||
Move `Components/Pages/Admin/SmtpConfiguration.razor` →
|
||||
`Components/Pages/Notifications/SmtpConfiguration.razor`. Route `/admin/smtp` →
|
||||
`/notifications/smtp`. Page content, `RequireAdmin` policy, and the
|
||||
`SmtpConfiguration` namespace alias are unchanged.
|
||||
|
||||
## 3. Notification Lists (new page)
|
||||
|
||||
New `Components/Pages/Notifications/NotificationLists.razor`
|
||||
(`/notifications/lists`, RequireDesign): a `DataTable` of notification lists with
|
||||
Add and per-row Edit actions, plus an empty state — extracted verbatim from the
|
||||
notification-lists block currently in `ExternalSystems.razor`.
|
||||
|
||||
- `NotificationListForm.razor` routes move:
|
||||
`/design/notification-lists/create` → `/notifications/lists/create`,
|
||||
`/design/notification-lists/{Id:int}/edit` → `/notifications/lists/{Id:int}/edit`.
|
||||
Its "Back" navigation targets `/notifications/lists`.
|
||||
- The notification-lists section is **removed** from `ExternalSystems.razor`,
|
||||
leaving that page purely external systems. The three `/design/notification-lists/...`
|
||||
navigate-links in `ExternalSystems.razor` are removed with it.
|
||||
|
||||
## 4. Notification Report
|
||||
|
||||
New `Components/Pages/Notifications/NotificationReport.razor`
|
||||
(`/notifications/report`, RequireDeployment), split from the existing
|
||||
`Monitoring/NotificationOutbox.razor`. Retains the full filter bar, the paginated
|
||||
`Notifications`-table query (`NotificationOutboxQueryRequest`), and the per-row
|
||||
Retry/Discard actions. The **KPI tile row is removed** from this page.
|
||||
|
||||
`Components/Pages/Monitoring/NotificationOutbox.razor` and its Monitoring nav entry
|
||||
are **deleted**.
|
||||
|
||||
## 5. Notification KPIs
|
||||
|
||||
New `Components/Pages/Notifications/NotificationKpis.razor`
|
||||
(`/notifications/kpis`, RequireDeployment) with a manual Refresh button. Two parts:
|
||||
|
||||
1. **Global tiles** — the existing 5: Queue Depth, Stuck, Parked, Delivered Last
|
||||
Interval, Oldest Pending Age.
|
||||
2. **Per-source-site breakdown table** — one row per site with the same five
|
||||
metrics, so operators can see which site is backing up.
|
||||
|
||||
### Backend addition for per-site KPIs
|
||||
|
||||
Bounded, additive, follows the existing global-KPI pattern:
|
||||
|
||||
- `INotificationOutboxRepository.ComputePerSiteKpisAsync(...)` → returns a
|
||||
per-site collection (a new `SiteNotificationKpiSnapshot` record carrying the
|
||||
source site id plus the five metrics). Implemented in
|
||||
`NotificationOutboxRepository`.
|
||||
- New message pair in `Messages/Notification/NotificationOutboxQueries.cs`:
|
||||
`PerSiteNotificationKpiRequest` / `PerSiteNotificationKpiResponse` (additive —
|
||||
honors message-evolution rules).
|
||||
- A handler in `NotificationOutboxActor` for the new request, mirroring the
|
||||
existing `NotificationKpiRequest` handler.
|
||||
- A `CommunicationService.GetPerSiteNotificationKpisAsync(...)` method mirroring
|
||||
`GetNotificationKpisAsync`.
|
||||
|
||||
Per CLAUDE.md, KPIs remain point-in-time computed from the `Notifications` table —
|
||||
no time-series store, no historical charts (YAGNI).
|
||||
|
||||
## 6. Health dashboard
|
||||
|
||||
`Monitoring/Health.razor` keeps its KPI tile row unchanged. A "View details →"
|
||||
link is added from that tile row to `/notifications/kpis`.
|
||||
|
||||
## Error handling
|
||||
|
||||
Unchanged from the current Outbox page: KPI/query faults surface as an inline
|
||||
warning alert (`Success == false` → `ErrorMessage`); the site-name lookup degrades
|
||||
gracefully to raw site ids. Per-site KPI faults are reported the same way.
|
||||
|
||||
## Testing
|
||||
|
||||
- bUnit component tests for `NotificationLists`, `NotificationReport`,
|
||||
`NotificationKpis`, and the moved `SmtpConfiguration` page.
|
||||
- A `NavMenu` test asserting the Notifications section renders and that per-item
|
||||
visibility honors Admin/Design/Deployment roles.
|
||||
- Repository tests for `ComputePerSiteKpisAsync`.
|
||||
- Actor test for the `PerSiteNotificationKpiRequest` handler.
|
||||
- `CommunicationService` test for `GetPerSiteNotificationKpisAsync`.
|
||||
|
||||
## Out of scope
|
||||
|
||||
- Historical/trend KPI charts (no time-series store).
|
||||
- Any change to notification delivery, store-and-forward, or the `Notifications`
|
||||
table schema.
|
||||
- Renaming the Notification Outbox **component** (#21) — only the UI page names change.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-19-notifications-nav-group.md",
|
||||
"tasks": [
|
||||
{"id": 53, "subject": "Task 1: Per-site KPI domain type + repo contract", "status": "completed"},
|
||||
{"id": 54, "subject": "Task 2: ComputePerSiteKpisAsync repository impl", "status": "completed", "blockedBy": [53]},
|
||||
{"id": 55, "subject": "Task 3: Per-site KPI message contracts", "status": "completed", "blockedBy": [53]},
|
||||
{"id": 56, "subject": "Task 4: Actor per-site KPI handler", "status": "completed", "blockedBy": [54, 55]},
|
||||
{"id": 57, "subject": "Task 5: CommunicationService per-site KPI accessor", "status": "completed", "blockedBy": [56]},
|
||||
{"id": 58, "subject": "Task 6: Move SMTP page to /notifications/smtp", "status": "completed"},
|
||||
{"id": 59, "subject": "Task 7: New Notification Lists page", "status": "completed"},
|
||||
{"id": 60, "subject": "Task 8: Move list form route; drop External Systems tab", "status": "completed", "blockedBy": [59]},
|
||||
{"id": 61, "subject": "Task 9: New Notification Report page; retire Outbox page", "status": "completed"},
|
||||
{"id": 62, "subject": "Task 10: New Notification KPIs page", "status": "completed", "blockedBy": [57]},
|
||||
{"id": 63, "subject": "Task 11: NavMenu Notifications section", "status": "completed", "blockedBy": [58, 59, 61, 62]},
|
||||
{"id": 64, "subject": "Task 12: Health dashboard KPI page link", "status": "completed", "blockedBy": [62]},
|
||||
{"id": 65, "subject": "Task 13: Full build + suite verification", "status": "completed", "blockedBy": [53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64]}
|
||||
],
|
||||
"lastUpdated": "2026-05-19"
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
# Design: Notification Outbox
|
||||
|
||||
**Date:** 2026-05-18
|
||||
**Status:** Basic design — approved, open for refinement.
|
||||
|
||||
## Problem
|
||||
|
||||
Notification delivery today happens at the site clusters: scripts call `Notify.To().Send()`,
|
||||
the Notification Service composes an email, and the site sends it via SMTP. The Store-and-Forward
|
||||
Engine buffers transient failures. Two gaps motivated this design:
|
||||
|
||||
1. **No audit trail.** A successful send is recorded nowhere. A permanently-failed send is
|
||||
returned to the script and then lost. Only a transiently-failed-and-buffered notification
|
||||
is visible — indirectly, as Store-and-Forward activity.
|
||||
2. **No monitoring.** There is no view of delivery health: no KPIs, and no way to find
|
||||
notifications that are stuck or have been parked.
|
||||
|
||||
## Solution overview
|
||||
|
||||
Invert where delivery happens. Sites no longer send notifications directly. Instead:
|
||||
|
||||
- A site script's notification is **store-and-forwarded to the central cluster**.
|
||||
- Central **logs every notification to a `Notifications` table** in the central config DB
|
||||
(MS SQL) — the single source of audit truth.
|
||||
- A central **Notification Outbox** dispatches and delivers from that table, with retry,
|
||||
parking, per-notification status, and KPIs.
|
||||
|
||||
The `Notifications` table is type-agnostic so it can record any notification type the system
|
||||
supports — email today, Microsoft Teams and others later.
|
||||
|
||||
### End-to-end flow
|
||||
|
||||
```
|
||||
Site script: Notify.To("list").Send(subject, body)
|
||||
│ generate NotificationId (GUID) locally; return it to the script immediately
|
||||
▼
|
||||
Site Store-and-Forward Engine (notification category, target = central)
|
||||
│ durably forwards to central via the Communication Layer (ClusterClient);
|
||||
│ buffers/retries if central is unreachable
|
||||
▼
|
||||
Central ingest: insert-if-not-exists on NotificationId → Notifications table (Pending)
|
||||
│ ack the site → site S&F clears the message
|
||||
▼
|
||||
Central Notification Outbox actor (singleton, active central node)
|
||||
│ polls due rows; resolves the list; delivers via the matching adapter
|
||||
├── success → Delivered
|
||||
├── transient failure → Retrying (schedule NextAttemptAt)
|
||||
└── permanent failure
|
||||
/ retries exhausted → Parked
|
||||
```
|
||||
|
||||
`Notify.Status(notificationId)` returns a small **status record** — status, retry count,
|
||||
last error, and key timestamps (enqueued, delivered). While the notification is still in the
|
||||
site S&F buffer the site answers the query **locally** (status `Forwarding`); once forwarded,
|
||||
the query round-trips to central and reads the `Notifications` table.
|
||||
|
||||
## Component design
|
||||
|
||||
### New component #21: Notification Outbox
|
||||
|
||||
A **central** component — the first outbox to live centrally (the Store-and-Forward Engine
|
||||
remains site-only).
|
||||
|
||||
- **Location:** Central cluster.
|
||||
- **Actor:** `NotificationOutboxActor` — a **singleton on the active central node**.
|
||||
- **Owns:** the durable central queue (the `Notifications` table), the dispatcher loop,
|
||||
retry scheduling, parking, per-notification status tracking, and KPI computation.
|
||||
- SMTP/HTTP delivery is blocking I/O — delivery work runs on a **dedicated blocking-I/O
|
||||
dispatcher** (same pattern as Script Execution Actors).
|
||||
|
||||
### Notification Service (revised)
|
||||
|
||||
Shrinks to two clear jobs, both **central-only**:
|
||||
|
||||
- Manage **notification-list and SMTP definitions** in the config DB.
|
||||
- Provide **delivery adapters** — stateless "deliver one notification" implementations per
|
||||
type (see below).
|
||||
|
||||
Notifications and SMTP config are **no longer deployed to sites**. Sites never talk to SMTP.
|
||||
|
||||
### Store-and-Forward Engine (revised)
|
||||
|
||||
Keeps its notification category, but the delivery *target* changes from SMTP to **central**.
|
||||
"Delivering" a buffered notification now means handing it to the Communication Layer for the
|
||||
central cluster and clearing it on central's ack. The site→central forward uses a fixed
|
||||
retry interval configured in the host `appsettings.json` — it concerns reaching the central
|
||||
cluster rather than any notification list.
|
||||
|
||||
## Typed notification lists
|
||||
|
||||
Each notification list gains a **`Type`** field plus type-specific targets:
|
||||
|
||||
- `Email` — a set of recipient addresses (implemented now).
|
||||
- `Teams`, others — future types.
|
||||
|
||||
`Notify.To("list")` works transparently for any type — the script does not care. Lists are
|
||||
defined and stored centrally only.
|
||||
|
||||
**Recipient resolution happens at central, at delivery time** — the site forwards only
|
||||
`(listName, subject, body)`. This keeps definitions in one place and removes the deploy-to-sites
|
||||
artifact entirely.
|
||||
|
||||
## The `Notifications` table (central MS SQL)
|
||||
|
||||
Type-agnostic. One row per notification.
|
||||
|
||||
| Field | Notes |
|
||||
|---|---|
|
||||
| `NotificationId` | GUID, primary key. Generated at the **site**; used as the idempotency key. |
|
||||
| `Type` | `Email` / `Teams` / … discriminator. |
|
||||
| `ListName` | Target notification list. |
|
||||
| `Subject`, `Body` | Plain-text content. |
|
||||
| `TypeData` | JSON — extensibility hook for future per-type fields. |
|
||||
| `Status` | `Pending` → `Retrying` → `Delivered` / `Parked` / `Discarded`. |
|
||||
| `RetryCount` | Delivery attempts so far. |
|
||||
| `LastError` | Detail of the most recent failure. |
|
||||
| `ResolvedTargets` | Who the notification actually went to — snapshotted by central at delivery time, for audit. |
|
||||
| `SourceSiteId`, `SourceInstanceId`, `SourceScript` | Provenance. |
|
||||
| `SiteEnqueuedAt` | When the script called `Send()` (carried from the site). |
|
||||
| `CreatedAt` | When central ingested the row. |
|
||||
| `LastAttemptAt`, `NextAttemptAt`, `DeliveredAt` | Delivery timestamps. |
|
||||
|
||||
All timestamps are UTC.
|
||||
|
||||
### Status lifecycle
|
||||
|
||||
- `Forwarding` — in the site S&F buffer, not yet received by central. **Site-local only** —
|
||||
never stored in the central `Notifications` table; reported by `Notify.Status` while the
|
||||
site still holds the notification.
|
||||
- `Pending` — ingested by central, awaiting first dispatch.
|
||||
- `Retrying` — a transient failure occurred; `NextAttemptAt` schedules the next attempt.
|
||||
- `Delivered` — terminal, success.
|
||||
- `Parked` — terminal-not-delivered: a permanent failure, or retries exhausted. `LastError`
|
||||
distinguishes which.
|
||||
- `Discarded` — terminal, reached **only by operator action** on a parked notification. The
|
||||
row is kept (not deleted) so the table remains a complete audit record.
|
||||
|
||||
### Retry policy
|
||||
|
||||
Delivery retry reuses the central SMTP configuration's max-retry-count and fixed retry
|
||||
interval — consistent with the existing fixed-interval (no backoff) convention.
|
||||
|
||||
### Retention
|
||||
|
||||
Terminal rows (`Delivered`, `Parked`, `Discarded`) are removed by a **daily purge job** after
|
||||
a configurable window (default ~1 year). This preserves a strong audit trail while bounding
|
||||
table growth. Non-terminal rows are never purged.
|
||||
|
||||
## Delivery adapters
|
||||
|
||||
An `INotificationDeliveryAdapter` is registered per `Type`. Each `Deliver(...)` call returns
|
||||
one of `success | transient failure | permanent failure`, mirroring the External System
|
||||
Gateway error-classification pattern.
|
||||
|
||||
- **Email adapter — implemented now.** The existing SMTP composition/send logic, relocated
|
||||
to the central cluster.
|
||||
- **Teams and other adapters — future.** The `Type` discriminator and the adapter interface
|
||||
are the seam; no Teams code is written in this basic plan. Teams auth and targeting
|
||||
(Incoming Webhooks vs Graph API) is a separate design conversation.
|
||||
|
||||
## Active/standby behavior
|
||||
|
||||
The `NotificationOutboxActor` is a singleton on the active central node. All outbox state
|
||||
lives in MS SQL, which is already the central HA store — so no Akka-level replication is
|
||||
needed (unlike the site S&F engine). On central failover the new active node resumes
|
||||
dispatch directly from the table.
|
||||
|
||||
The site→central handoff is **at-least-once**: central acks only after the row is persisted,
|
||||
and a lost ack causes the site to resend. The GUID `NotificationId` idempotency key makes a
|
||||
resend harmless (insert-if-not-exists). A rare failover mid-delivery could re-send one
|
||||
already-`Delivered` notification — an accepted trade-off, consistent with the duplicate-delivery
|
||||
trade-off the Store-and-Forward Engine already accepts.
|
||||
|
||||
## Monitoring
|
||||
|
||||
### KPIs
|
||||
|
||||
Central-computed from the `Notifications` table — global, with a per-source-site breakdown:
|
||||
|
||||
- **Queue depth** — count of `Pending` + `Retrying`.
|
||||
- **Stuck count** — `Pending`/`Retrying` rows older than a configurable age threshold
|
||||
(default 10 minutes).
|
||||
- **Parked count** — count of `Parked`.
|
||||
- **Delivered (last interval)** — count of `Delivered` since the previous sample.
|
||||
- **Oldest pending age** — age of the oldest non-terminal notification.
|
||||
|
||||
### Stuck detection
|
||||
|
||||
A notification is **stuck** if it is `Pending` or `Retrying` and older than the configurable
|
||||
age threshold. Detection is **display-only** — a count KPI and a row badge. No automated
|
||||
escalation or alerting, consistent with the current system-wide no-alerting policy.
|
||||
|
||||
### Surfacing
|
||||
|
||||
- **Health Monitoring dashboard** — headline KPI tiles: queue depth, stuck count, parked
|
||||
count. These are central-computed (not part of the site health report). The site S&F
|
||||
notification backlog remains a separate site health metric, covering the site→central leg.
|
||||
- **New Central UI "Notification Outbox" page** — KPI tiles plus a queryable notification
|
||||
list: filter by status, type, source site, list, and time range; a stuck-only toggle;
|
||||
keyword search on subject. Parked notifications offer **Retry** (→ `Pending`, reset
|
||||
`RetryCount`/`NextAttemptAt`) and **Discard** (→ `Discarded`) actions. Stuck rows are badged.
|
||||
|
||||
## Cross-document impact
|
||||
|
||||
| Document | Change |
|
||||
|---|---|
|
||||
| `Component-NotificationOutbox.md` | **New** — component #21. |
|
||||
| `Component-NotificationService.md` | Delivery moves central; lists gain a `Type`; no deploy-to-sites; async script API; delivery adapters. |
|
||||
| `Component-StoreAndForward.md` | Notification category retargeted from SMTP to central. |
|
||||
| `Component-HealthMonitoring.md` | Outbox KPIs added as central-computed headline metrics. |
|
||||
| `Component-SiteEventLogging.md` | New Notification event category — logs site→central forward failures and long-buffered notifications. |
|
||||
| `Component-CentralUI.md` | New Notification Outbox page. |
|
||||
| Central–Site Communication | New `NotificationSubmit` + ack message pair. |
|
||||
| Configuration Database / Commons | `Notifications` table, entity POCO, repository interface + implementation, EF migration, message contracts. |
|
||||
| `README.md` | Component table 20 → 21. |
|
||||
| `CLAUDE.md` | Component list 20 → 21; new key design decisions. |
|
||||
|
||||
## Refinement decisions (2026-05-18)
|
||||
|
||||
- **Site→central forward retry config** — the fixed forward-retry interval lives in the host
|
||||
`appsettings.json` (infrastructure config, not a deployed artifact).
|
||||
- **`Notify.Status` payload** — returns a status record: status, retry count, last error,
|
||||
and key timestamps (enqueued, delivered).
|
||||
- **Stuck threshold default** — 10 minutes, configurable.
|
||||
- **Pre-ingest status** — a distinct site-local `Forwarding` state; the site answers
|
||||
`Notify.Status` from its own S&F buffer without a round-trip to central.
|
||||
- **Site-side diagnostics** — Site Event Logging records site→central **forward failures**
|
||||
and long-buffered notifications only, not routine enqueue/forward success events.
|
||||
- **KPI history** — point-in-time only, computed on demand from the `Notifications` table;
|
||||
the ~1-year row retention answers historical questions directly, so no separate
|
||||
time-series store is added.
|
||||
|
||||
## Open questions
|
||||
|
||||
None outstanding — the basic design is fully specified. The next step is an implementation
|
||||
plan against the cross-document impact table.
|
||||
@@ -65,7 +65,8 @@ Central cluster only. Sites have no user interface.
|
||||
|
||||
### Notification List Management (Design Role)
|
||||
- Create, edit, and delete notification lists.
|
||||
- Manage recipients (name + email) within each list.
|
||||
- Each notification list has a **`Type`** — `Email` now, with `Teams` and other types planned. The type determines the type-specific targets a list carries.
|
||||
- Manage recipients (name + email) within each `Email` list.
|
||||
- Configure SMTP settings.
|
||||
|
||||
### Site & Data Connection Management (Admin Role)
|
||||
@@ -97,7 +98,7 @@ Central cluster only. Sites have no user interface.
|
||||
- Track deployment status (pending, in-progress, success, failed).
|
||||
|
||||
### System-Wide Artifact Deployment (Deployment Role)
|
||||
- Explicitly deploy shared scripts, external system definitions, database connection definitions, data connection definitions, notification lists, and SMTP configuration to all sites or to an individual site.
|
||||
- Explicitly deploy shared scripts, external system definitions, database connection definitions, and data connection definitions to all sites or to an individual site. (Notification lists and SMTP configuration are central-only and are not deployed.)
|
||||
- **Per-site deployment**: A "Deploy Artifacts" button on the Sites admin page allows deploying all artifacts to an individual site.
|
||||
- **Deploy all**: A bulk action deploys artifacts to all sites at once.
|
||||
- This is a **separate action** from instance deployment — system-wide artifacts are not automatically pushed when definitions change.
|
||||
@@ -114,13 +115,29 @@ Central cluster only. Sites have no user interface.
|
||||
- Subscribe-on-demand — stream starts when opened, stops when closed.
|
||||
|
||||
### Parked Message Management (Deployment Role)
|
||||
- Query sites for parked messages (external system calls, notifications, cached DB writes).
|
||||
- Query sites for parked messages (external system calls, cached DB writes). (Parked notifications are managed centrally on the Notification Outbox page, not here.)
|
||||
- View message details (target, payload, retry count, timestamps).
|
||||
- Retry or discard individual parked messages.
|
||||
|
||||
### Notification Outbox (Deployment Role)
|
||||
- Monitor and manage centrally-delivered notifications. The Notification Outbox dispatches every notification store-and-forwarded from sites and logs each one to the central `Notifications` table.
|
||||
- **KPI tiles** at the top of the page: queue depth (`Pending` + `Retrying`), stuck count, parked count, delivered in the last interval, and oldest pending age. The KPIs are central-computed on demand from the `Notifications` table.
|
||||
- A **queryable notification list** filterable by status, type, source site, notification list, and time range, with a **stuck-only toggle** and keyword search on subject. Each row shows the notification's status, retry count, last error, and key timestamps.
|
||||
- **Retry** and **Discard** actions are available on parked notifications: Retry returns the notification to `Pending` and resets `RetryCount` / `NextAttemptAt`; Discard moves it to `Discarded`. The row is retained either way so the table stays a complete audit record.
|
||||
- **Stuck rows are visually badged** — a notification is stuck if it is `Pending` or `Retrying` and older than the configurable stuck-age threshold. Stuck detection is display-only; there is no automated escalation or alerting.
|
||||
- All queries are served from the central `Notifications` table — no remote per-site queries are needed, unlike the Parked Message Management page.
|
||||
|
||||
### Site Calls (Deployment Role)
|
||||
- Monitor cached calls store-and-forwarded from sites — `ExternalSystem.CachedCall()` and `Database.CachedWrite()` operations. Scoped to the `ExternalCall` and `DatabaseWrite` kinds only; notifications keep their separate Notification Outbox page and are not merged here.
|
||||
- A **queryable cached-call list** filterable by site, kind, status, and time range. Each row shows the call's timestamp, site, kind, target summary, status badge, retry count, and last error.
|
||||
- **Retry** and **Discard** actions are available on `Parked` rows only — `Failed` rows are not actionable, since a permanent failure would simply fail again and its error was already returned synchronously to the calling script. The actions issue central→site commands to the owning site; if the site is offline the UI surfaces a "site unreachable" message.
|
||||
- Data is served from the central Site Call Audit component's `SiteCalls` table. The page is **read-mostly** — an eventually-consistent mirror of site state; the site remains the source of truth.
|
||||
|
||||
### Health Monitoring Dashboard (All Roles)
|
||||
- Overview of all sites with online/offline status.
|
||||
- Per-site detail: active/standby node status, data connection health, script error rates, alarm evaluation error rates, store-and-forward buffer depths.
|
||||
- Headline **Notification Outbox KPI tiles** — queue depth, stuck count, and parked count. These are central-computed by the Notification Outbox from the central `Notifications` table (not part of any site health report). The full outbox view is on the dedicated Notification Outbox page.
|
||||
- Headline **Site Call Audit KPI tiles** — buffered count, parked count, and failed-last-interval. These are central-computed by the Site Call Audit component from the central `SiteCalls` table (not part of any site health report). The full cached-call view is on the dedicated Site Calls page.
|
||||
|
||||
### Site Event Log Viewer (Deployment Role)
|
||||
- Query site event logs remotely.
|
||||
@@ -144,3 +161,5 @@ Central cluster only. Sites have no user interface.
|
||||
- **Security & Auth**: Authenticates users and enforces role-based access.
|
||||
- **Configuration Database**: All central data, including audit log data for the audit log viewer. Accessed via `ICentralUiRepository`.
|
||||
- **Health Monitoring**: Provides site health data for the dashboard.
|
||||
- **Notification Outbox**: Provides notification delivery KPIs and serves the `Notifications` table queries and Retry/Discard actions for the Notification Outbox page.
|
||||
- **Site Call Audit**: Serves the `SiteCalls` table queries and relays Retry/Discard actions to sites for the Site Calls page.
|
||||
|
||||
@@ -35,6 +35,9 @@ Commons must define shared primitive and utility types used across multiple comp
|
||||
- **`AlarmLevel` enum**: None, Low, LowLow, High, HighHigh. Severity level for an active alarm; always `None` for binary trigger types, set by `HiLo` triggers.
|
||||
- **`AlarmTriggerType` enum**: ValueMatch, RangeViolation, RateOfChange, HiLo.
|
||||
- **`ConnectionHealth` enum**: Connected, Disconnected, Connecting, Error.
|
||||
- **`TrackedOperationId`**: A GUID identifying a tracked store-and-forward operation (`ExternalSystem.CachedCall`, `Database.CachedWrite`, `Notify.Send`). Generated caller-side at the site at call time, returned to the script as a tracking handle, and reused as the idempotency key for telemetry sent to central. The notification domain's existing `NotificationId` is the notification-specific name for this same concept.
|
||||
- **`TrackedOperationKind` enum**: ExternalCall, DatabaseWrite. Discriminates the two cached-call kinds carried by a tracked operation (notifications are tracked separately via the `NotificationType` enum).
|
||||
- **`TrackedOperationStatus` enum**: Pending, Retrying, Delivered, Parked, Failed, Discarded. The unified lifecycle state shared by all tracked store-and-forward operations. This is the operation's externally-observable lifecycle status in the site-local tracking table (the status record); it is related to but distinct from the S&F buffer's own `StoreAndForwardMessageStatus`, which tracks a buffered message's retry state within the buffer (the retry mechanism). `Failed` (permanent failure) has no notification analogue — notifications use only the other five states (the `NotificationStatus` enum omits `Failed`).
|
||||
|
||||
Types defined here must be immutable and thread-safe.
|
||||
|
||||
@@ -65,12 +68,14 @@ Entity classes are organized by domain area:
|
||||
- **Shared Scripts**: `SharedScript`.
|
||||
- **Sites & Data Connections**: `Site`, `DataConnection`.
|
||||
- **External Systems & Database Connections**: `ExternalSystemDefinition`, `ExternalSystemMethod`, `DatabaseConnectionDefinition`.
|
||||
- **Notifications**: `NotificationList`, `NotificationRecipient`, `SmtpConfiguration`.
|
||||
- **Notifications**: `NotificationList` (carries a `Type` field — `NotificationType` enum — selecting the list's notification type and its type-specific targets), `NotificationRecipient`, `SmtpConfiguration`, `Notification` (the durable central-queue row — see below).
|
||||
- **Inbound API**: `ApiKey`, `ApiMethod`.
|
||||
- **Security**: `LdapGroupMapping`, `SiteScopeRule`.
|
||||
- **Deployment**: `DeploymentRecord`, `SystemArtifactDeploymentRecord`, `DeployedConfigSnapshot`.
|
||||
- **Audit**: `AuditLogEntry`.
|
||||
|
||||
The **`Notification`** entity is the persistence-ignorant POCO for a row of the central `Notifications` table — the durable notification queue owned by the Notification Outbox. It is a plain class with properties for `NotificationId` (GUID, the idempotency key), `Type` (`NotificationType` enum discriminator), `ListName`, `Subject`, `Body`, `TypeData` (a JSON string — the type-agnostic extensibility hook), `Status` (`NotificationStatus` enum), `RetryCount`, `LastError`, `ResolvedTargets`, the provenance fields `SourceSiteId` / `SourceInstanceId` / `SourceScript`, and the UTC timestamps `SiteEnqueuedAt`, `CreatedAt`, `LastAttemptAt`, `NextAttemptAt`, `DeliveredAt`. As with every entity class it has no EF dependency; the Configuration Database component supplies the Fluent API mapping, value conversions, and indexes. The `Type` and `Status` enums (`NotificationType`: `Email`, `Teams`, …; `NotificationStatus`: `Pending`, `Retrying`, `Delivered`, `Parked`, `Discarded`) are defined under `Types/Enums/` per REQ-COM-1.
|
||||
|
||||
### REQ-COM-4: Per-Component Repository Interfaces
|
||||
|
||||
Commons must define repository interfaces that consuming components use for data access. Each interface is tailored to the data needs of its consuming component:
|
||||
@@ -80,7 +85,9 @@ Commons must define repository interfaces that consuming components use for data
|
||||
- `ISecurityRepository` — LDAP group mappings, site scoping rules.
|
||||
- `IInboundApiRepository` — API keys, API method definitions.
|
||||
- `IExternalSystemRepository` — External system definitions, method definitions, database connection definitions.
|
||||
- `INotificationRepository` — Notification lists, recipients, SMTP configuration.
|
||||
- `INotificationRepository` — Notification lists (including the `Type` field), recipients, SMTP configuration.
|
||||
- `INotificationOutboxRepository` — The `Notifications` table: insert-if-not-exists ingest on `NotificationId`, due-row polling (`Pending` rows and `Retrying` rows past `NextAttemptAt`), status transitions, KPI aggregate queries, and the bulk delete of terminal rows used by the daily purge job.
|
||||
- `ISiteCallAuditRepository` — The `SiteCalls` table: insert-if-not-exists ingest on `TrackedOperationId`, upsert-on-newer-status from telemetry and reconciliation pulls, KPI aggregate queries, and the bulk delete of terminal rows used by the daily purge job.
|
||||
- `ISiteRepository` — Sites, data connections, and their site assignments.
|
||||
- `ICentralUiRepository` — Read-oriented queries spanning multiple domain areas for display purposes.
|
||||
|
||||
@@ -115,6 +122,9 @@ Commons must define the shared DTOs and message contracts used for inter-compone
|
||||
- **Debug View DTOs**: Subscribe/unsubscribe requests, one-shot snapshot request (`DebugSnapshotRequest`), initial snapshot, stream filter criteria.
|
||||
- **Script Execution DTOs**: Script call requests (with recursion depth), return values, error results.
|
||||
- **System-Wide Artifact DTOs**: Shared script packages, external system definitions, database connection definitions, notification list definitions.
|
||||
- **Notification DTOs**: `NotificationSubmit` (site→central submission: `NotificationId`, `ListName`, `Subject`, `Body`, provenance, `SiteEnqueuedAt`) and `NotificationSubmitAck` (central acknowledgement returned only after the `Notifications` row is persisted — ack-after-persist — which the site Store-and-Forward Engine waits on before clearing the buffered message). `NotificationStatusQuery` / `NotificationStatusResponse` back the `Notify.Status` script API, round-tripping a status record (status, retry count, last error, key timestamps) once a notification has been forwarded. Recipient resolution is *not* part of any contract — the site forwards only `(listName, subject, body)` and central resolves the list at delivery time. Subject to the additive-only evolution rules in REQ-COM-5a, since a submission can cross the site→central version-skew boundary.
|
||||
- **Cached Call Tracking DTOs**: `CachedCallTelemetry` (site→central lifecycle telemetry for a tracked cached call: `TrackedOperationId`, source site, `Kind` — the `TrackedOperationKind` enum (`ExternalCall` / `DatabaseWrite`) — target summary, status, retry count, last error, key timestamps, and source instance / script provenance) and `CachedCallReconcileRequest` / `CachedCallReconcileResponse` (cursor-based per-site pull of tracking rows changed since a cursor, used so missed telemetry self-heals). All three live in the `Integration/` message folder and are subject to the additive-only evolution rules in REQ-COM-5a, since they cross the site→central version-skew boundary.
|
||||
- **Parked Operation Command DTOs**: `RetryParkedOperation` and `DiscardParkedOperation` (central→site command/control messages keyed by `TrackedOperationId`, instructing the owning site to retry or discard a parked store-and-forward operation). These generalize the existing parked-message retry/discard commands to also cover parked cached calls; they live in the `RemoteQuery/` message folder alongside the other parked-message management messages.
|
||||
|
||||
All message types must be `record` types or immutable classes suitable for use as Akka.NET messages (though Commons itself must not depend on Akka.NET).
|
||||
|
||||
@@ -141,10 +151,13 @@ ScadaLink.Commons/
|
||||
│ ├── StaleTagMonitor.cs # heartbeat staleness watchdog
|
||||
│ ├── ValueFormatter.cs # culture-invariant value-to-string helper
|
||||
│ ├── DynamicJsonElement.cs # dynamic JSON wrapper for scripts
|
||||
│ ├── TrackedOperationId.cs # tracked store-and-forward operation ID (GUID)
|
||||
│ ├── Enums/ # InstanceState, DeploymentStatus, AlarmState,
|
||||
│ │ # AlarmLevel, AlarmTriggerType, ConnectionHealth,
|
||||
│ │ # DataType, StoreAndForwardCategory,
|
||||
│ │ # StoreAndForwardMessageStatus
|
||||
│ │ # StoreAndForwardMessageStatus,
|
||||
│ │ # NotificationType, NotificationStatus,
|
||||
│ │ # TrackedOperationKind, TrackedOperationStatus
|
||||
│ ├── DataConnections/ # OPC UA endpoint config value objects + enums
|
||||
│ ├── Flattening/ # FlattenedConfiguration, ConfigurationDiff,
|
||||
│ │ # DeploymentPackage, ValidationResult
|
||||
@@ -158,6 +171,8 @@ ScadaLink.Commons/
|
||||
│ │ ├── IInboundApiRepository.cs
|
||||
│ │ ├── IExternalSystemRepository.cs
|
||||
│ │ ├── INotificationRepository.cs
|
||||
│ │ ├── INotificationOutboxRepository.cs
|
||||
│ │ ├── ISiteCallAuditRepository.cs
|
||||
│ │ ├── ISiteRepository.cs
|
||||
│ │ └── ICentralUiRepository.cs
|
||||
│ └── Services/ # REQ-COM-4a: Cross-cutting service interfaces
|
||||
@@ -174,7 +189,8 @@ ScadaLink.Commons/
|
||||
│ ├── Sites/ # Site, DataConnection
|
||||
│ ├── ExternalSystems/ # ExternalSystemDefinition, ExternalSystemMethod,
|
||||
│ │ # DatabaseConnectionDefinition
|
||||
│ ├── Notifications/ # NotificationList, NotificationRecipient, SmtpConfiguration
|
||||
│ ├── Notifications/ # NotificationList, NotificationRecipient, SmtpConfiguration,
|
||||
│ │ # Notification (central Notifications-table row)
|
||||
│ ├── InboundApi/ # ApiKey, ApiMethod
|
||||
│ ├── Security/ # LdapGroupMapping, SiteScopeRule
|
||||
│ ├── Deployment/ # DeploymentRecord, SystemArtifactDeploymentRecord,
|
||||
@@ -192,9 +208,13 @@ ScadaLink.Commons/
|
||||
│ ├── Artifacts/
|
||||
│ ├── DataConnection/ # data-connection subscribe/write/health messages
|
||||
│ ├── Instance/ # attribute get/set request/command messages
|
||||
│ ├── Integration/ # external-integration call request/response
|
||||
│ ├── Integration/ # external-integration call request/response,
|
||||
│ │ # cached-call tracking telemetry + reconcile
|
||||
│ ├── Notification/ # NotificationSubmit + ack,
|
||||
│ │ # NotificationStatusQuery/Response
|
||||
│ ├── InboundApi/ # Route.To() request messages
|
||||
│ ├── RemoteQuery/ # event-log and parked-message query messages
|
||||
│ ├── RemoteQuery/ # event-log and parked-message query messages,
|
||||
│ │ # parked-operation retry/discard commands
|
||||
│ └── Management/ # HTTP/ClusterClient management commands + registry
|
||||
├── Serialization/ # OpcUaEndpointConfigSerializer (typed↔legacy JSON)
|
||||
└── Validators/ # OpcUaEndpointConfigValidator
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
## Purpose
|
||||
|
||||
The Communication component manages all messaging between the central cluster and site clusters. It provides the transport layer for deployments, instance lifecycle commands, integration routing, debug streaming, health reporting, and remote queries (parked messages, event logs). Two transports are used: **Akka.NET ClusterClient** for command/control messaging and **gRPC server-streaming** for real-time data (attribute values, alarm states).
|
||||
The Communication component manages all messaging between the central cluster and site clusters. It provides the transport layer for deployments, instance lifecycle commands, integration routing, debug streaming, health reporting, notification submission, and remote queries (parked messages, event logs). Two transports are used: **Akka.NET ClusterClient** for command/control messaging and **gRPC server-streaming** for real-time data (attribute values, alarm states).
|
||||
|
||||
## Location
|
||||
|
||||
@@ -36,7 +36,7 @@ Both central and site clusters. Each side has communication actors that handle m
|
||||
|
||||
### 3. System-Wide Artifact Deployment (Central → Site(s))
|
||||
- **Pattern**: Broadcast with per-site acknowledgment (deploy to all sites), or targeted to a single site (per-site deployment).
|
||||
- When shared scripts, external system definitions, database connections, data connections, notification lists, or SMTP configuration are explicitly deployed, central sends them to the target site(s).
|
||||
- When shared scripts, external system definitions, database connections, or data connections are explicitly deployed, central sends them to the target site(s). (Notification lists and SMTP configuration are central-only and are not deployed to sites.)
|
||||
- Each site acknowledges receipt and reports success/failure independently.
|
||||
- **Shared script deployment triggers immediate recompilation on the site** — the site's `SharedScriptLibrary` replaces its in-memory compiled code, making updated shared scripts available to all running instances without redeployment. Other artifact types (external systems, database connections, etc.) are stored but do not require recompilation.
|
||||
|
||||
@@ -122,7 +122,22 @@ Keepalive settings are configurable via `CommunicationOptions`:
|
||||
- Site event logs.
|
||||
- Instance debug snapshots (attribute values and alarm states).
|
||||
- Central can also send management commands:
|
||||
- Retry or discard parked messages.
|
||||
- Retry or discard parked messages and parked cached calls — central sends `RetryParkedOperation` / `DiscardParkedOperation` (keyed by `TrackedOperationId`) to the owning site, which applies the change to its S&F buffer and tracking table.
|
||||
|
||||
### 9. Notification Submission (Site → Central)
|
||||
- **Pattern**: Fire-and-forget with acknowledgment.
|
||||
- The site **Store-and-Forward Engine** sends a `NotificationSubmit` message to central carrying the notification — `NotificationId`, target list name, subject, body, and source provenance.
|
||||
- Central ingests the submission with an insert-if-not-exists on `NotificationId` and acknowledges **after the row is persisted** to the `Notifications` table in the central configuration database. The site S&F engine clears the buffered message only on that ack.
|
||||
- The `NotificationId` GUID — generated at the site — is the **idempotency key**. The handoff is at-least-once: a re-sent submission after a lost ack is harmless because central's insert-if-not-exists treats the duplicate as a no-op.
|
||||
- **Transport**: ClusterClient (site→central command/control), consistent with how other site→central messages are sent.
|
||||
|
||||
### 10. Cached Call Telemetry (Site → Central)
|
||||
- **Pattern**: Fire-and-forget telemetry with a periodic reconciliation pull.
|
||||
- The site **Store-and-Forward Engine** emits a `CachedCallTelemetry` message to central on **every** cached-call lifecycle transition (`Pending → Retrying → Delivered / Parked / Failed / Discarded`). The first telemetry event for an operation carries its initial status — `Pending` when a transient failure has buffered the call, or directly `Delivered`/`Failed` for a cached call that never buffers. The message carries the `TrackedOperationId`, source site, `Kind` (the `TrackedOperationKind` enum), target summary, status, retry count, last error, key timestamps, and source provenance.
|
||||
- Emission is **best-effort and at-least-once**, **idempotent on `TrackedOperationId`** — central's Site Call Audit component ingests with insert-if-not-exists then upsert-on-newer-status, so a re-sent or out-of-order event is harmless.
|
||||
- **Reconciliation pull**: because telemetry is best-effort, the central **Site Call Audit** component periodically — and on site reconnect — issues a `CachedCallReconcileRequest` to each site; the site replies with a `CachedCallReconcileResponse` carrying all tracking rows changed since a cursor. Any telemetry missed during a disconnect self-heals through this pull.
|
||||
- Central audit is an **eventually-consistent mirror** — the site's operation tracking table remains the source of truth for cached-call status (`Tracking.Status(id)` is always answered site-locally).
|
||||
- **Transport**: ClusterClient (site→central command/control), consistent with how other site→central messages are sent.
|
||||
|
||||
## Topology
|
||||
|
||||
@@ -174,6 +189,8 @@ Each request/response pattern has a default timeout that can be overridden in co
|
||||
| 4. Integration Routing | 30 seconds | External system waiting for response; Inbound API per-method timeout may cap this further |
|
||||
| 5. Recipe/Command Delivery | 30 seconds | Fire-and-forget with ack |
|
||||
| 8. Remote Queries | 30 seconds | Querying parked messages or event logs |
|
||||
| 9. Notification Submission | 30 seconds | Fire-and-forget with ack; central acks after persisting the row |
|
||||
| 10. Cached Call Telemetry | 30 seconds | Reconciliation pull is request/response; telemetry emission itself is fire-and-forget |
|
||||
|
||||
Timeouts use the Akka.NET **ask pattern**. If no response is received within the timeout, the caller receives a timeout failure.
|
||||
|
||||
@@ -229,6 +246,7 @@ The ManagementActor is registered at the well-known path `/user/management` on c
|
||||
- **Site Runtime**: Receives deployments, lifecycle commands, and artifact updates. Provides debug view data.
|
||||
- **Central UI**: Debug view requests and remote queries flow through communication.
|
||||
- **Health Monitoring**: Receives periodic health reports from sites.
|
||||
- **Store-and-Forward Engine (site)**: Parked message queries/commands are routed through communication.
|
||||
- **Store-and-Forward Engine (site)**: Parked message queries/commands are routed through communication. Also emits `CachedCallTelemetry` and answers `CachedCallReconcileRequest` pulls, and receives relayed `RetryParkedOperation` / `DiscardParkedOperation` commands.
|
||||
- **Site Call Audit (central)**: Receives cached-call telemetry and reconciliation responses; issues reconciliation pulls and relays parked-operation Retry/Discard commands to sites through communication.
|
||||
- **Site Event Logging**: Event log queries are routed through communication.
|
||||
- **Management Service**: The ManagementActor is registered with ClusterClientReceptionist on central nodes. The CLI communicates with the ManagementActor via ClusterClient, which is a separate channel from inter-cluster remoting.
|
||||
|
||||
@@ -52,9 +52,13 @@ The configuration database stores all central system data, organized by domain a
|
||||
- **Database Connection Definitions**: Named database connections (name, connection details, retry settings).
|
||||
|
||||
### Notifications
|
||||
- **Notification Lists**: List definitions (name).
|
||||
- **Notification Lists**: List definitions (name, `Type` discriminator — `Email` / `Teams` / … — selecting the list's notification type and type-specific targets).
|
||||
- **Notification Recipients**: Recipients per list (name, email address).
|
||||
- **SMTP Configuration**: Email server settings.
|
||||
- **Notifications**: The durable central notification queue owned by the Notification Outbox — one row per notification, the single source of audit truth. The schema is **type-agnostic** so it records any notification type the system supports (email today, Microsoft Teams and others later): a `Type` discriminator selects the type, and a `TypeData` JSON column (`nvarchar(max)`) carries any future per-type fields without a schema change. Columns: `NotificationId` (GUID, primary key — generated at the site, used as the idempotency key), `Type`, `ListName`, `Subject`, `Body`, `TypeData`, `Status`, `RetryCount`, `LastError`, `ResolvedTargets`, `SourceSiteId`, `SourceInstanceId`, `SourceScript`, `SiteEnqueuedAt`, `CreatedAt`, `LastAttemptAt`, `NextAttemptAt`, `DeliveredAt`. `Status` is a `NotificationStatus` enum stored with values `Pending`, `Retrying`, `Delivered`, `Parked`, `Discarded` (the site-local `Forwarding` state is never persisted centrally). Indexed on `Status` and `NextAttemptAt` for efficient dispatcher polling of due rows, and on `SourceSiteId` and `CreatedAt` for KPI computation and the Central UI query page. Terminal rows are removed by a daily purge job — see Scheduled Maintenance below. See Component-NotificationOutbox.md for the full lifecycle.
|
||||
|
||||
### Site Calls
|
||||
- **SiteCalls**: The central audit table for cached site calls — `ExternalSystem.CachedCall()` and `Database.CachedWrite()` — owned by the Site Call Audit component and a sibling of the `Notifications` table. One row per cached operation. Columns: `TrackedOperationId` (GUID, primary key — generated site-side at call time, used as the idempotency key), `SourceSite`, `Kind` (a `TrackedOperationKind` enum stored with values `ExternalCall` / `DatabaseWrite`), `TargetSummary` (external system + method for an `ExternalCall`, database connection name for a `DatabaseWrite`), `Status` (a `TrackedOperationStatus` enum stored with values `Pending`, `Retrying`, `Delivered`, `Parked`, `Failed`, `Discarded`), `RetryCount`, `LastError`, `Provenance` (source instance / script), `CreatedAtUtc`, `UpdatedAtUtc`, `TerminalAtUtc`. The table is populated **only** by Site Call Audit telemetry and reconciliation pulls — sites are the source of truth and the row is an eventually-consistent mirror, never written by a central dispatcher. Ingestion is **insert-if-not-exists** keyed on `TrackedOperationId`, then **upsert-on-newer-status**; the lifecycle is monotonic, so at-least-once and out-of-order telemetry are harmless. Indexed on `Status` and `SourceSite` for KPI computation and the Central UI query page. Terminal rows are removed by a daily purge job — see Scheduled Maintenance below. See Component-SiteCallAudit.md for the full lifecycle.
|
||||
|
||||
### Inbound API
|
||||
- **API Keys**: Key definitions (name/label, key value, enabled flag).
|
||||
@@ -94,7 +98,9 @@ Repository interfaces are defined in **Commons** alongside the POCO entity class
|
||||
| `ISecurityRepository` | Security & Auth | LDAP group mappings, site scoping rules |
|
||||
| `IInboundApiRepository` | Inbound API | API keys, API method definitions |
|
||||
| `IExternalSystemRepository` | External System Gateway | External system definitions, method definitions, database connection definitions |
|
||||
| `INotificationRepository` | Notification Service | Notification lists, recipients, SMTP configuration |
|
||||
| `INotificationRepository` | Notification Service | Notification lists (including the `Type` field), recipients, SMTP configuration |
|
||||
| `INotificationOutboxRepository` | Notification Outbox | The `Notifications` table — insert-if-not-exists ingest, due-row polling, status transitions, KPI aggregate queries, and bulk delete of terminal rows used by the daily purge job |
|
||||
| `ISiteCallAuditRepository` | Site Call Audit | The `SiteCalls` table — insert-if-not-exists ingest, upsert-on-newer-status, KPI aggregate queries, and bulk delete of terminal rows used by the daily purge job |
|
||||
| `IHealthMonitoringRepository` | Health Monitoring | (Minimal — health data is in-memory; repository needed only if connectivity history is persisted in the future) |
|
||||
| `ICentralUiRepository` | Central UI | Read-oriented queries spanning multiple domain areas for display purposes |
|
||||
|
||||
@@ -198,7 +204,7 @@ Since only the after-state is stored, change history for an entity is reconstruc
|
||||
| Alarms | Create, edit, delete alarm definitions |
|
||||
| Instances | Create, override values, bind connections, area assignment, disable, enable, delete |
|
||||
| Deployments | Deploy to instance (who, what, which instance, success/failure) |
|
||||
| System-Wide Artifact Deployments | Deploy shared scripts / external system definitions / DB connections / data connections / notification lists / SMTP config to site(s) (who, what, which site(s), result) |
|
||||
| System-Wide Artifact Deployments | Deploy shared scripts / external system definitions / DB connections / data connections to site(s) (who, what, which site(s), result) |
|
||||
| External Systems | Create, edit, delete definitions |
|
||||
| Database Connections | Create, edit, delete definitions |
|
||||
| Notification Lists | Create, edit, delete lists and recipients |
|
||||
@@ -226,6 +232,7 @@ Results are returned in reverse chronological order (most recent first) with pag
|
||||
|
||||
- Schema changes are managed via EF Core Migrations (`dotnet ef migrations add`, `dotnet ef migrations script`).
|
||||
- Each migration is a versioned, incremental schema change.
|
||||
- New tables are introduced as their own migration — for example, the `Notifications` table for the Notification Outbox ships as a dedicated EF Core migration that creates the table, its `Type`/`Status` value conversions, and its dispatcher and KPI indexes.
|
||||
|
||||
### Development Environment
|
||||
- Migrations are **auto-applied** at application startup using `dbContext.Database.MigrateAsync()`.
|
||||
@@ -265,6 +272,18 @@ The Configuration Database supports seeding initial data required for the system
|
||||
|
||||
---
|
||||
|
||||
## Scheduled Maintenance
|
||||
|
||||
### Notifications Table Purge
|
||||
|
||||
The `Notifications` table grows one row per notification and is never trimmed by normal operation — `Discarded` rows are deliberately retained for audit. To bound table growth while preserving a strong audit trail, a **daily purge job** deletes terminal rows (`Delivered`, `Parked`, `Discarded`) older than a configurable retention window (default 365 days). Non-terminal rows (`Pending`, `Retrying`) are never purged. The purge is a bulk `DELETE` against `INotificationOutboxRepository`; it is owned and scheduled by the Notification Outbox component (see Component-NotificationOutbox.md), which supplies the retention window from `NotificationOutboxOptions`. The Configuration Database component provides only the repository operation and the table.
|
||||
|
||||
### SiteCalls Table Purge
|
||||
|
||||
The `SiteCalls` table grows one row per cached site call and is never trimmed by normal operation. To bound table growth while preserving a strong audit trail, a **daily purge job** deletes terminal rows (`Delivered`, `Failed`, `Discarded`) older than a configurable retention window (default 365 days). Non-terminal rows (`Pending`, `Retrying`, `Parked`) are never purged. The purge is a bulk `DELETE`; it is owned and scheduled by the Site Call Audit component (see Component-SiteCallAudit.md), which supplies the retention window. The Configuration Database component provides only the repository operation and the table.
|
||||
|
||||
---
|
||||
|
||||
## Connection Management
|
||||
|
||||
- Connection strings are provided via the Host's `DatabaseConfiguration` options (bound from `appsettings.json`).
|
||||
@@ -289,6 +308,8 @@ The Configuration Database supports seeding initial data required for the system
|
||||
- **Inbound API**: Uses `IInboundApiRepository` for API keys and method definitions.
|
||||
- **External System Gateway**: Uses `IExternalSystemRepository` for external system and database connection definitions.
|
||||
- **Notification Service**: Uses `INotificationRepository` for notification lists and SMTP configuration.
|
||||
- **Notification Outbox**: Uses `INotificationOutboxRepository` for all access to the `Notifications` table — ingest, dispatch polling, status updates, KPI queries, and the daily purge of terminal rows.
|
||||
- **Site Call Audit**: Uses `ISiteCallAuditRepository` for all access to the `SiteCalls` table — telemetry/reconciliation ingest, KPI queries, and the daily purge of terminal rows.
|
||||
- **Central UI**: Uses `ICentralUiRepository` for read-oriented queries across domain areas, including audit log queries for the audit log viewer.
|
||||
- **All central components that modify state**: Call `IAuditService.LogAsync()` after successful operations to record audit entries within the same transaction.
|
||||
- **Host**: Provides database connection configuration. Registers DbContext, repository implementations, and `IAuditService` implementation in the DI container. Triggers auto-migration in development or validates schema version in production.
|
||||
|
||||
@@ -17,7 +17,7 @@ Central cluster only. The site-side deployment responsibilities (receiving confi
|
||||
- Track deployment status (pending, in-progress, success, failed).
|
||||
- Handle deployment failures gracefully — if a site is unreachable or the deployment fails, report the failure. No retry or buffering at central.
|
||||
- If a central failover occurs during deployment, the deployment is treated as failed and must be re-initiated.
|
||||
- Deploy system-wide artifacts (shared scripts, external system definitions, database connection definitions, data connection definitions, notification lists, SMTP configuration) to all sites or to an individual site on explicit request.
|
||||
- Deploy system-wide artifacts (shared scripts, external system definitions, database connection definitions, data connection definitions) to all sites or to an individual site on explicit request.
|
||||
- Send instance lifecycle commands (disable, enable, delete) to sites via the Communication Layer.
|
||||
|
||||
## Deployment Flow
|
||||
@@ -106,30 +106,14 @@ A deployment to a site includes the flattened instance configuration plus any sy
|
||||
- External system definitions
|
||||
- Database connection definitions
|
||||
- Data connection definitions
|
||||
- Notification lists
|
||||
- SMTP configuration
|
||||
|
||||
System-wide artifact deployment is a **separate action** from instance deployment, triggered explicitly by a user with the Deployment role. Artifacts can be deployed to all sites at once or to an individual site (per-site deployment via the Sites admin page).
|
||||
|
||||
### Secret handling in artifacts
|
||||
|
||||
The SMTP configuration artifact carries the SMTP credential (password or OAuth2
|
||||
client secret). This is a **conscious, accepted design decision**: SMTP
|
||||
configuration is a deployable artifact, so the credential is distributed to
|
||||
sites that need it. The credential is protected by the following controls:
|
||||
|
||||
- **In transit** — artifact-deployment commands travel over the inter-cluster
|
||||
transport, which is TLS-protected (see Cluster Infrastructure / Communication).
|
||||
- **Not logged** — the Deployment Manager never writes credential values to
|
||||
logs; deployment log statements reference only site IDs/names, the deployment
|
||||
ID, and exception messages.
|
||||
- **At rest on the site** — the credential is stored in the site's local SQLite
|
||||
artifact store. At-rest encryption of that field is **not** currently applied;
|
||||
it is treated as acceptable given the TLS-protected transport, the absence of
|
||||
any logging leak, and the trust boundary of the site host. Encrypting the
|
||||
credential field within the artifact payload would require a key-management
|
||||
scheme (key location and distribution to sites) and is recorded here as a
|
||||
possible future hardening item, not a current requirement.
|
||||
Notification lists and SMTP configuration are **not** deployable artifacts — they
|
||||
are central-only definitions managed by the Notification Service (see
|
||||
Component-NotificationService.md). Notification delivery happens on the central
|
||||
cluster, so no notification artifact or SMTP credential is ever distributed to
|
||||
sites.
|
||||
|
||||
## Site-Side Apply Atomicity
|
||||
|
||||
|
||||
@@ -59,10 +59,11 @@ Each database connection definition includes:
|
||||
- Failures are immediate — no buffering.
|
||||
|
||||
### Cached Write (Store-and-Forward)
|
||||
- Script calls `Database.CachedWrite("name", "sql", parameters)`.
|
||||
- The write is submitted to the Store-and-Forward Engine.
|
||||
- Script calls `Database.CachedWrite("name", "sql", parameters)`. This is **deferred delivery**: the call returns a `TrackedOperationId` tracking handle immediately rather than the write result.
|
||||
- Payload includes: connection name, SQL statement, serialized parameter values.
|
||||
- If the database is unavailable, the write is buffered and retried per the connection's retry settings.
|
||||
- The write is attempted immediately. On immediate success it is recorded as a terminal `Delivered` tracking record. On **transient failure** (database unavailable) it is buffered (`Pending`/`Retrying`) and retried per the connection's retry settings by the Store-and-Forward Engine.
|
||||
- On **permanent failure** (e.g. a SQL syntax or constraint error — a request that will never succeed), the error is returned **synchronously** to the calling script and the write is **not** buffered. The call is also recorded as a terminal `Failed` tracking record capturing the error.
|
||||
- Cached-write status is observable to scripts via `Tracking.Status(id)` (answered site-locally and authoritatively) and centrally via the Site Call Audit component.
|
||||
|
||||
## Invocation Protocol
|
||||
|
||||
@@ -84,10 +85,11 @@ Scripts choose between two call modes per invocation, mirroring the dual-mode da
|
||||
- Use for request/response interactions where the script needs the result (e.g., fetching a recipe, querying inventory).
|
||||
|
||||
### Cached (Store-and-Forward)
|
||||
- Script calls `ExternalSystem.CachedCall("systemName", "methodName", params)`.
|
||||
- The call is attempted immediately. If it succeeds, the response is discarded (fire-and-forget).
|
||||
- On **transient failure** (connection refused, timeout, HTTP 5xx), the call is routed to the Store-and-Forward Engine for retry per the system's retry settings. The script does **not** block — the call is buffered and the script continues.
|
||||
- On **permanent failure** (HTTP 4xx), the error is returned **synchronously** to the calling script. No retry — the request itself is wrong.
|
||||
- Script calls `ExternalSystem.CachedCall("systemName", "methodName", params)`. This is **deferred delivery**: the call returns a `TrackedOperationId` tracking handle immediately rather than the response body.
|
||||
- The call is attempted immediately. If it succeeds, the response is discarded and the call is recorded as a terminal `Delivered` tracking record.
|
||||
- On **transient failure** (connection refused, timeout, HTTP 5xx), the call is routed to the Store-and-Forward Engine for retry per the system's retry settings. The script does **not** block — the call is buffered (`Pending`/`Retrying`) and the script continues.
|
||||
- On **permanent failure** (HTTP 4xx), the error is returned **synchronously** to the calling script. No retry — the request itself is wrong. The call is also recorded as a terminal `Failed` tracking record capturing the error.
|
||||
- Cached-call status is observable to scripts via `Tracking.Status(id)` (answered site-locally and authoritatively) and centrally via the Site Call Audit component.
|
||||
- Use for outbound data pushes where deferred delivery is acceptable (e.g., posting production data, sending quality reports).
|
||||
|
||||
## Call Timeout & Error Handling
|
||||
@@ -95,7 +97,7 @@ Scripts choose between two call modes per invocation, mirroring the dual-mode da
|
||||
- Each external system definition specifies a **timeout** that applies to all method calls on that system.
|
||||
- Error classification by HTTP response:
|
||||
- **Transient failures** (connection refused, timeout, HTTP 408, 429, 5xx): Behavior depends on call mode — `CachedCall` buffers for retry; `Call` returns error to script.
|
||||
- **Permanent failures** (HTTP 4xx except 408/429): Always returned to the calling script regardless of call mode. Logged to Site Event Logging.
|
||||
- **Permanent failures** (HTTP 4xx except 408/429): Always returned to the calling script regardless of call mode. Logged to Site Event Logging. For `CachedCall`, the failure is additionally recorded as a terminal `Failed` tracking record — so even a never-buffered cached call has an authoritative status record.
|
||||
- This classification ensures the S&F buffer is not polluted with requests that will never succeed.
|
||||
- **Idempotency note**: `CachedCall` retries may result in duplicate delivery if the external system received the original request but the response was lost. Callers should use `CachedCall` only for operations that are idempotent or where duplicate delivery is acceptable.
|
||||
|
||||
@@ -114,7 +116,8 @@ Scripts choose between two call modes per invocation, mirroring the dual-mode da
|
||||
|
||||
- **Configuration Database (MS SQL)**: Stores external system and database connection definitions (central only).
|
||||
- **Local SQLite**: At sites, external system and database connection definitions are read from local SQLite (populated by artifact deployment). Sites do not access the central config DB.
|
||||
- **Store-and-Forward Engine**: Handles buffering for failed external system calls and cached database writes.
|
||||
- **Store-and-Forward Engine**: Handles buffering for failed external system calls and cached database writes, and owns the site-local operation tracking table read by `Tracking.Status(id)`.
|
||||
- **Site Call Audit**: Central audit mirror for cached calls — receives cached-call lifecycle telemetry so `CachedCall`/`CachedWrite` status is observable centrally.
|
||||
- **Communication Layer**: Routes inbound external system requests from central to sites.
|
||||
- **Security & Auth**: Design role manages definitions.
|
||||
- **Configuration Database (via IAuditService)**: Definition changes are audit logged.
|
||||
@@ -122,5 +125,6 @@ Scripts choose between two call modes per invocation, mirroring the dual-mode da
|
||||
## Interactions
|
||||
|
||||
- **Site Runtime (Script/Alarm Execution Actors)**: Scripts invoke external system methods and database operations through this component.
|
||||
- **Store-and-Forward Engine**: Failed calls and cached writes are routed here for reliable delivery.
|
||||
- **Store-and-Forward Engine**: Failed calls and cached writes are routed here for reliable delivery; it also assigns each cached call a `TrackedOperationId` tracking row.
|
||||
- **Site Call Audit**: The central observability sibling for cached calls — cached-call status reported here is queried via the Central UI Site Calls page.
|
||||
- **Deployment Manager**: Receives updated definitions as part of system-wide artifact deployment (triggered explicitly by Deployment role).
|
||||
|
||||
@@ -29,8 +29,11 @@ Site clusters (metric collection and reporting). Central cluster (aggregation an
|
||||
| Tag resolution counts | Data Connection Layer | Per connection: total subscribed tags vs. successfully resolved tags |
|
||||
| Script error rates | Site Runtime (Script Actors) | Frequency of script failures |
|
||||
| Alarm evaluation error rates | Site Runtime (Alarm Actors) | Frequency of alarm evaluation failures |
|
||||
| Store-and-forward buffer depth | Store-and-Forward Engine | Pending messages by category (external, notification, DB write) |
|
||||
| Store-and-forward buffer depth | Store-and-Forward Engine | Pending messages by category — external, notification (notifications awaiting forward to central), DB write |
|
||||
| Dead letter count | Akka.NET EventStream | Messages sent to actors that no longer exist — indicates stale references or timing issues |
|
||||
| Notification Outbox queue depth | Notification Outbox (central) | Count of `Pending` + `Retrying` notifications — central-computed, not site-reported |
|
||||
| Notification Outbox stuck count | Notification Outbox (central) | Count of `Pending` / `Retrying` notifications older than the configurable stuck-age threshold — central-computed, not site-reported |
|
||||
| Notification Outbox parked count | Notification Outbox (central) | Count of `Parked` notifications — central-computed, not site-reported |
|
||||
|
||||
## Reporting Protocol
|
||||
|
||||
@@ -50,10 +53,34 @@ Script error rates and alarm evaluation error rates are calculated as **raw coun
|
||||
- **Alarm evaluation errors** include all failures during alarm condition evaluation.
|
||||
- For detailed diagnostics (error types, stack traces, affected instances), operators use the **Site Event Log Viewer** — the health dashboard is for quick triage, not forensics.
|
||||
|
||||
## Notification Outbox KPIs
|
||||
|
||||
The Notification Outbox is a **central** component, so its KPIs are **central-computed** rather than collected from sites and carried in the site health report:
|
||||
|
||||
- The dashboard surfaces three **headline** outbox KPIs: **queue depth** (`Pending` + `Retrying`), **stuck count** (`Pending` / `Retrying` rows older than the configurable stuck-age threshold), and **parked count** (`Parked`).
|
||||
- The Notification Outbox component computes these on demand from the central `Notifications` table; the health dashboard polls it for the headline tiles.
|
||||
- The fuller KPI set — which also includes **delivered (last interval)** and **oldest pending age** — lives on the Central UI **Notification Outbox** page, not the health dashboard.
|
||||
- Outbox KPIs are **point-in-time**, computed on demand from the `Notifications` table. There is no time-series store — consistent with Health Monitoring's "current status only" philosophy. The outbox's own ~1-year row retention answers historical questions directly.
|
||||
|
||||
These are distinct from the site-reported **Store-and-forward buffer depth** notification metric, which now covers the **site→central leg** — notifications still buffered in a site's Store-and-Forward Engine awaiting forward to central — and remains part of the site health report.
|
||||
|
||||
## Site Call Audit KPIs
|
||||
|
||||
The Site Call Audit is a **central** component, so its KPIs — like the Notification Outbox's — are **central-computed** rather than collected from sites and carried in the site health report:
|
||||
|
||||
- The dashboard surfaces Site Call Audit **headline** KPI tiles alongside the existing Notification Outbox tiles.
|
||||
- The Site Call Audit component computes these on demand from the central `SiteCalls` table, **global and per-source-site**; the health dashboard polls it for the headline tiles.
|
||||
- The KPI set is **buffered count** (`Pending` + `Retrying`), **parked count** (`Parked`), **failed (last interval)**, **delivered (last interval)**, **oldest pending age**, and **stuck count** (`Pending` / `Retrying` rows older than the configurable stuck-age threshold).
|
||||
- **Stuck** is `Pending` / `Retrying` rows older than a configurable threshold (default **10 minutes**) — **display-only** (KPI count plus a row badge), with no escalation or alerting, consistent with the Notification Outbox stuck metric.
|
||||
- Site Call Audit KPIs are **point-in-time**, computed on demand from the `SiteCalls` table. There is no time-series store — consistent with Health Monitoring's "current status only" philosophy.
|
||||
|
||||
Unlike the Notification Outbox, the Site Call Audit is **not a dispatcher** — cached calls are delivered by each site's Store-and-Forward Engine, and the `SiteCalls` table is an eventually-consistent central mirror of site-owned status.
|
||||
|
||||
## Central Storage
|
||||
|
||||
- Health metrics are held **in memory** at the central cluster for display in the UI.
|
||||
- No historical health data is persisted — the dashboard shows current/latest status only.
|
||||
- Notification Outbox and Site Call Audit KPIs are not stored by Health Monitoring; they are computed point-in-time from the central `Notifications` and `SiteCalls` tables respectively each time the dashboard refreshes — consistent with the current-status-only philosophy.
|
||||
- Site connectivity history (online/offline transitions) may optionally be logged via the Audit Log or a separate mechanism if needed in the future.
|
||||
|
||||
## No Alerting
|
||||
@@ -66,8 +93,10 @@ Script error rates and alarm evaluation error rates are calculated as **raw coun
|
||||
- **Communication Layer**: Transports health reports from sites to central.
|
||||
- **Data Connection Layer (site)**: Provides connection health metrics.
|
||||
- **Site Runtime (site)**: Provides script error rate and alarm evaluation error rate metrics.
|
||||
- **Store-and-Forward Engine (site)**: Provides buffer depth metrics.
|
||||
- **Store-and-Forward Engine (site)**: Provides buffer depth metrics, including the notification backlog awaiting forward to central.
|
||||
- **Cluster Infrastructure (site)**: Provides node role status.
|
||||
- **Notification Outbox (central)**: Provides central-computed outbox KPIs — queue depth, stuck count, parked count — for the headline dashboard tiles.
|
||||
- **Site Call Audit (central)**: Provides central-computed cached-call KPIs — buffered count, parked count, failed/delivered (last interval), oldest pending age, stuck count — for the headline dashboard tiles.
|
||||
|
||||
## Interactions
|
||||
|
||||
|
||||
@@ -31,8 +31,8 @@ The same compiled binary must be deployable to both central and site nodes. The
|
||||
|
||||
At startup the Host must inspect the configured node role and register only the component services appropriate for that role:
|
||||
|
||||
- **Shared** (both Central and Site): ClusterInfrastructure, Communication, HealthMonitoring, ExternalSystemGateway, NotificationService.
|
||||
- **Central only**: TemplateEngine, DeploymentManager, Security, AuditLogging, CentralUI, InboundAPI, ManagementService.
|
||||
- **Shared** (both Central and Site): ClusterInfrastructure, Communication, HealthMonitoring, ExternalSystemGateway.
|
||||
- **Central only**: TemplateEngine, DeploymentManager, Security, AuditLogging, CentralUI, InboundAPI, ManagementService, NotificationService, NotificationOutbox, SiteCallAudit.
|
||||
- **Site only**: SiteRuntime, DataConnectionLayer, StoreAndForward, SiteEventLogging.
|
||||
|
||||
Components not applicable to the current role must not be registered in the DI container or the Akka.NET actor system.
|
||||
@@ -60,7 +60,9 @@ The Host must bind configuration sections from `appsettings.json` to strongly-ty
|
||||
| `ScadaLink:Communication` | `CommunicationOptions` | Communication | DeploymentTimeout, LifecycleTimeout, QueryTimeout, TransportHeartbeatInterval, TransportFailureThreshold |
|
||||
| `ScadaLink:Security` | `SecurityOptions` | Security & Auth | LdapServer, LdapPort, LdapUseTls, JwtSigningKey, JwtExpiryMinutes, IdleTimeoutMinutes |
|
||||
| `ScadaLink:InboundApi` | `InboundApiOptions` | Inbound API | DefaultMethodTimeout |
|
||||
| `ScadaLink:Notification` | `NotificationOptions` | Notification Service | (SMTP config is stored in config DB and deployed to sites, not in appsettings) |
|
||||
| `ScadaLink:Notification` | `NotificationOptions` | Notification Service | (SMTP config is stored in the central config DB, not in appsettings) |
|
||||
| `ScadaLink:NotificationOutbox` | `NotificationOutboxOptions` | Notification Outbox | Dispatcher poll interval, stuck-age threshold, retention window (delivery retry settings reuse the central SMTP configuration) |
|
||||
| `ScadaLink:SiteCallAudit` | `SiteCallAuditOptions` | Site Call Audit | Reconciliation pull interval, stuck-age threshold, retention window |
|
||||
| `ScadaLink:ManagementService` | `ManagementServiceOptions` | Management Service | (Reserved for future configuration) |
|
||||
| `ScadaLink:Logging` | `LoggingOptions` | Host | Serilog sink configuration, log level overrides |
|
||||
|
||||
@@ -176,7 +178,9 @@ The Host's `Program.cs` calls these extension methods; the component libraries o
|
||||
| Communication | Yes | Yes | Yes | Yes | No |
|
||||
| HealthMonitoring | Yes | Yes | Yes | Yes | No |
|
||||
| ExternalSystemGateway | Yes | Yes | Yes | Yes | No |
|
||||
| NotificationService | Yes | Yes | Yes | Yes | No |
|
||||
| NotificationService | Yes | No | Yes | Yes | No |
|
||||
| NotificationOutbox | Yes | No | Yes | Yes | No |
|
||||
| SiteCallAudit | Yes | No | Yes | Yes | No |
|
||||
| TemplateEngine | Yes | No | Yes | Yes | No |
|
||||
| DeploymentManager | Yes | No | Yes | Yes | No |
|
||||
| Security | Yes | No | Yes | Yes | No |
|
||||
@@ -193,7 +197,7 @@ The Host's `Program.cs` calls these extension methods; the component libraries o
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **All 17 component libraries**: The Host references every component project to call their extension methods (excludes CLI, which is a separate executable).
|
||||
- **All 18 component libraries**: The Host references every component project to call their extension methods (excludes CLI, which is a separate executable).
|
||||
- **Akka.Hosting**: For `AddAkka()` and the hosting configuration builder.
|
||||
- **Akka.Remote.Hosting, Akka.Cluster.Hosting**: For Akka subsystem configuration. (No Akka.Persistence plugin — see the Persistence note under REQ-HOST-6.)
|
||||
- **Serilog.AspNetCore**: For structured logging integration.
|
||||
|
||||
@@ -123,7 +123,7 @@ The endpoint performs LDAP authentication and role resolution server-side, colla
|
||||
### Deployments
|
||||
|
||||
- **DeployInstance**: Deploy configuration to a specific instance (includes pre-deployment validation).
|
||||
- **DeployArtifacts**: Deploy system-wide artifacts (shared scripts, external system definitions, DB connections, data connections, notification lists, SMTP config) to all sites or a specific site.
|
||||
- **DeployArtifacts**: Deploy system-wide artifacts (shared scripts, external system definitions, DB connections, data connections) to all sites or a specific site.
|
||||
- **GetDeploymentStatus**: Query deployment status.
|
||||
|
||||
### External Systems
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
# Component: Notification Outbox
|
||||
|
||||
## Purpose
|
||||
|
||||
The Notification Outbox is the central component that receives store-and-forwarded notifications from site clusters, logs every one to the `Notifications` table in the central configuration database, and delivers them through per-type delivery adapters. The `Notifications` table is the single source of audit truth: every notification — successfully delivered, parked, or discarded — has exactly one durable row. The outbox provides delivery retry, parking of failures, per-notification status tracking, and KPIs for delivery health.
|
||||
|
||||
This inverts where notification delivery happens. Sites no longer send notifications directly via SMTP; a site script's notification is store-and-forwarded to central, and the central outbox owns dispatch and delivery.
|
||||
|
||||
## Location
|
||||
|
||||
Central cluster. The `NotificationOutboxActor` is a **singleton on the active central node**. It is the first outbox component to live centrally — the Store-and-Forward Engine remains site-only.
|
||||
|
||||
## Responsibilities
|
||||
|
||||
- Own the durable central queue — the `Notifications` table in the central MS SQL database.
|
||||
- Ingest store-and-forwarded notifications from sites, insert-if-not-exists on `NotificationId`, and ack the site only after the row is persisted.
|
||||
- Run the dispatcher loop: poll due rows, resolve the target notification list, and deliver via the matching adapter.
|
||||
- Schedule retries for transient failures and park notifications on permanent failure or exhausted retries.
|
||||
- Track per-notification status across the delivery lifecycle.
|
||||
- Compute delivery KPIs from the `Notifications` table for the Health Monitoring dashboard and the Central UI.
|
||||
- Purge terminal rows daily after a configurable retention window.
|
||||
|
||||
SMTP and HTTP delivery is blocking I/O. Delivery work runs on a **dedicated blocking-I/O dispatcher**, the same pattern used by Script Execution Actors, so delivery never blocks the actor's dispatcher loop.
|
||||
|
||||
## End-to-End Flow
|
||||
|
||||
```
|
||||
Site script: Notify.To("list").Send(subject, body)
|
||||
│ generate NotificationId (GUID) locally; return it to the script immediately
|
||||
▼
|
||||
Site Store-and-Forward Engine (notification category, target = central)
|
||||
│ durably forwards to central via Central–Site Communication (ClusterClient);
|
||||
│ buffers/retries if central is unreachable
|
||||
▼
|
||||
Central ingest: insert-if-not-exists on NotificationId → Notifications table (Pending)
|
||||
│ ack the site → site S&F clears the message
|
||||
▼
|
||||
Central Notification Outbox actor (singleton, active central node)
|
||||
│ polls due rows; resolves the list; delivers via the matching adapter
|
||||
├── success → Delivered
|
||||
├── transient failure → Retrying (schedule NextAttemptAt)
|
||||
└── permanent failure
|
||||
/ retries exhausted → Parked
|
||||
```
|
||||
|
||||
The site forwards only `(listName, subject, body)` plus provenance — recipient resolution happens at central, at delivery time. This keeps notification-list definitions in one place and removes the deploy-to-sites artifact entirely.
|
||||
|
||||
`Notify.Status(notificationId)` returns a small status record — status, retry count, last error, and key timestamps (enqueued, delivered). While the notification is still in the site S&F buffer the site answers the query **locally** (status `Forwarding`); once forwarded, the query round-trips to central and reads the `Notifications` table.
|
||||
|
||||
## The `Notifications` Table
|
||||
|
||||
The table is type-agnostic so it can record any notification type the system supports — email today, Microsoft Teams and others later. One row per notification.
|
||||
|
||||
| Field | Notes |
|
||||
|---|---|
|
||||
| `NotificationId` | GUID, primary key. Generated at the **site**; used as the idempotency key. |
|
||||
| `Type` | `Email` / `Teams` / … discriminator. |
|
||||
| `ListName` | Target notification list. |
|
||||
| `Subject`, `Body` | Plain-text content. |
|
||||
| `TypeData` | JSON — extensibility hook for future per-type fields. |
|
||||
| `Status` | Lifecycle state — one of `Pending`, `Retrying`, `Delivered`, `Parked`, `Discarded`. See Status Lifecycle below. |
|
||||
| `RetryCount` | Delivery attempts so far. |
|
||||
| `LastError` | Detail of the most recent failure. |
|
||||
| `ResolvedTargets` | Who the notification actually went to — snapshotted by central at delivery time, for audit. |
|
||||
| `SourceSiteId`, `SourceInstanceId`, `SourceScript` | Provenance. |
|
||||
| `SiteEnqueuedAt` | When the script called `Send()` (carried from the site). |
|
||||
| `CreatedAt` | When central ingested the row. |
|
||||
| `LastAttemptAt`, `NextAttemptAt`, `DeliveredAt` | Delivery timestamps. |
|
||||
|
||||
All timestamps are UTC.
|
||||
|
||||
### Status Lifecycle
|
||||
|
||||
- `Forwarding` — in the site S&F buffer, not yet received by central. **Site-local only** — never stored in the central `Notifications` table; reported by `Notify.Status` while the site still holds the notification.
|
||||
- `Pending` — ingested by central, awaiting first dispatch.
|
||||
- `Retrying` — a transient failure occurred; `NextAttemptAt` schedules the next attempt.
|
||||
- `Delivered` — terminal, success.
|
||||
- `Parked` — terminal-not-delivered: a permanent failure, or retries exhausted. `LastError` distinguishes which.
|
||||
- `Discarded` — terminal, reached **only by operator action** on a parked notification. The row is kept (not deleted) so the table remains a complete audit record.
|
||||
|
||||
The Notification Outbox and the central [`Site Call Audit`](Component-SiteCallAudit.md) component share the `TrackedOperationId` tracking model and this status lifecycle, but differ in delivery locality: the Notification Outbox **delivers** notifications itself (central SMTP), whereas Site Call Audit only **audits** cached calls delivered site-locally by the site Store-and-Forward Engine — it is not a dispatcher.
|
||||
|
||||
### Retry Policy
|
||||
|
||||
Delivery retry reuses the central SMTP configuration's max-retry-count and fixed retry interval. The interval is fixed (no exponential backoff), consistent with the existing fixed-interval store-and-forward convention.
|
||||
|
||||
### Retention
|
||||
|
||||
Terminal rows (`Delivered`, `Parked`, `Discarded`) are removed by a **daily purge job** after a configurable window (default 365 days). This preserves a strong audit trail while bounding table growth. Non-terminal rows are never purged.
|
||||
|
||||
## Ingest & Idempotency
|
||||
|
||||
The site→central handoff is **at-least-once**. Central ingests an inbound notification submission with an insert-if-not-exists on `NotificationId`, then acks the site; the site S&F engine clears the message only on that ack. Because central acks only after the row is persisted (ack-after-persist), a lost ack causes the site to resend, and the GUID `NotificationId` idempotency key makes the resend harmless — the duplicate insert is a no-op.
|
||||
|
||||
A rare central failover mid-delivery could re-send one already-`Delivered` notification. This is an accepted trade-off, consistent with the duplicate-delivery trade-off the Store-and-Forward Engine already accepts.
|
||||
|
||||
## Dispatcher
|
||||
|
||||
The dispatcher loop runs on a fixed interval. On each tick the `NotificationOutboxActor`:
|
||||
|
||||
1. Polls the `Notifications` table for **due rows** — `Pending` rows, and `Retrying` rows whose `NextAttemptAt` has passed.
|
||||
2. Resolves the target notification list to its recipients/targets at central, at delivery time.
|
||||
3. Hands the notification to the delivery adapter registered for its `Type`, running on the dedicated blocking-I/O dispatcher.
|
||||
4. Applies the result:
|
||||
- **success** → `Delivered`, set `DeliveredAt`, snapshot `ResolvedTargets`.
|
||||
- **transient failure** → `Retrying`, increment `RetryCount`, set `NextAttemptAt`, record `LastError`; once retries are exhausted → `Parked`.
|
||||
- **permanent failure** → `Parked`, record `LastError`.
|
||||
|
||||
## Delivery Adapters
|
||||
|
||||
A delivery adapter implementing `INotificationDeliveryAdapter` is registered per `Type`. Each `Deliver(...)` call returns one of `success | transient failure | permanent failure`, mirroring the External System Gateway error-classification pattern.
|
||||
|
||||
- **Email adapter — implemented now.** The existing SMTP composition/send logic, relocated to the central cluster.
|
||||
- **Teams and other adapters — future.** The `Type` discriminator and the adapter interface are the seam; no Teams code exists in this design. Teams auth and targeting (Incoming Webhooks vs Graph API) is a separate design conversation.
|
||||
|
||||
Delivery adapters are provided by the Notification Service, which manages notification-list and SMTP definitions and supplies the stateless per-type "deliver one notification" implementations.
|
||||
|
||||
## Active/Standby Behavior
|
||||
|
||||
The `NotificationOutboxActor` is a singleton on the active central node. All outbox state lives in MS SQL, which is already the central HA store, so **no Akka-level replication is needed** (unlike the site S&F engine). On central failover the new active node resumes dispatch directly from the `Notifications` table — `Pending` rows and due `Retrying` rows are picked up on the next dispatcher tick.
|
||||
|
||||
## Monitoring
|
||||
|
||||
### KPIs
|
||||
|
||||
KPIs are central-computed from the `Notifications` table — global, with a per-source-site breakdown:
|
||||
|
||||
- **Queue depth** — count of `Pending` + `Retrying`.
|
||||
- **Stuck count** — `Pending` / `Retrying` rows older than the configurable stuck-age threshold.
|
||||
- **Parked count** — count of `Parked`.
|
||||
- **Delivered (last interval)** — count of `Delivered` since the previous sample.
|
||||
- **Oldest pending age** — age of the oldest non-terminal notification.
|
||||
|
||||
KPIs are point-in-time, computed on demand from the table. The configurable row retention (default 365 days) answers historical questions directly, so no separate time-series store is added.
|
||||
|
||||
### Stuck Detection
|
||||
|
||||
A notification is **stuck** if it is `Pending` or `Retrying` and older than a configurable age threshold (default 10 minutes). Detection is **display-only** — a count KPI and a row badge. There is no automated escalation or alerting, consistent with the system-wide no-alerting policy.
|
||||
|
||||
### Surfacing
|
||||
|
||||
- **Health Monitoring dashboard** — headline KPI tiles: queue depth, stuck count, parked count. These are central-computed and are not part of the site health report. The site S&F notification backlog remains a separate site health metric covering the site→central leg.
|
||||
- **Central UI "Notification Outbox" page** — KPI tiles plus a queryable notification list: filter by status, type, source site, list, and time range; a stuck-only toggle; keyword search on subject. Parked notifications offer **Retry** (→ `Pending`, reset `RetryCount` / `NextAttemptAt`) and **Discard** (→ `Discarded`) actions. Stuck rows are badged.
|
||||
|
||||
## Configuration
|
||||
|
||||
The component is configured via `NotificationOutboxOptions`, bound from an `appsettings.json` section on the central host (Options pattern):
|
||||
|
||||
- **Dispatch interval** — how often the dispatcher loop polls for due rows.
|
||||
- **Stuck-age threshold** — age beyond which a non-terminal notification is counted as stuck (default 10 minutes).
|
||||
- **Terminal-row retention window** — age after which terminal rows are removed by the daily purge job (default 365 days).
|
||||
|
||||
Delivery max-retry-count and retry interval are not part of `NotificationOutboxOptions` — they are reused from the central SMTP configuration.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **Notification Service**: Provides notification-list and SMTP definitions, and the per-type delivery adapters the outbox invokes.
|
||||
- **Configuration Database**: Hosts the `Notifications` table; provides the entity POCO, repository, and EF migration for outbox persistence.
|
||||
- **Central–Site Communication**: Carries inbound notification submissions and acks between sites and central.
|
||||
- **Health Monitoring**: Consumes the outbox KPIs as central-computed headline metrics.
|
||||
- **Central UI**: Hosts the Notification Outbox page.
|
||||
|
||||
## Interactions
|
||||
|
||||
- **Site Store-and-Forward Engine**: Forwards notifications to central via Central–Site Communication; the outbox ingests them and acks once persisted.
|
||||
- **Notification Service**: Supplies delivery adapters and resolves notification lists at delivery time.
|
||||
- **Central UI**: Queries the `Notifications` table for the Notification Outbox page and issues operator Retry/Discard actions on parked notifications.
|
||||
- **Health Monitoring**: Polls the outbox for KPI tiles on the health dashboard.
|
||||
@@ -2,84 +2,96 @@
|
||||
|
||||
## Purpose
|
||||
|
||||
The Notification Service provides email notification capabilities to scripts running at site clusters. It manages notification lists, handles email delivery, and integrates with the Store-and-Forward Engine for reliable delivery when the email server is unavailable.
|
||||
The Notification Service is the central component that manages notification-list and SMTP definitions and provides the per-type delivery adapters used to send notifications. It manages notification-list and SMTP definitions, and supplies the stateless "deliver one notification" adapter implementations that the Notification Outbox invokes at delivery time.
|
||||
|
||||
The Notification Service no longer delivers notifications from sites. Notification delivery has been inverted: a site script's notification is store-and-forwarded to the central cluster, and the central **Notification Outbox** owns dispatch and delivery, calling an `INotificationDeliveryAdapter` supplied by this component. See [`Component-NotificationOutbox.md`](Component-NotificationOutbox.md).
|
||||
|
||||
## Location
|
||||
|
||||
Central cluster (definition management, stores in config DB). Site clusters (email delivery, reads definitions from local SQLite).
|
||||
Central cluster only. The Notification Service manages definitions in the central configuration database and provides the delivery adapters that run on the central cluster. It is no longer present at site clusters, and notification definitions and SMTP configuration are no longer deployed to sites.
|
||||
|
||||
## Responsibilities
|
||||
|
||||
### Definitions (Central)
|
||||
- Store notification lists in the configuration database: list name, recipients (name + email address).
|
||||
- Store notification lists in the configuration database: list name, list **type**, and type-specific targets (e.g. recipients for an `Email` list).
|
||||
- Store email server configuration (SMTP settings).
|
||||
- Deploy notification lists and SMTP configuration uniformly to all sites. Deployment requires **explicit action** by a user with the Deployment role.
|
||||
- Managed by users with the Design role.
|
||||
- Notification lists and SMTP configuration are **not deployed to sites** — they exist centrally only. There is no deploy-to-sites artifact and no local SQLite copy.
|
||||
|
||||
### Delivery (Site)
|
||||
- Resolve notification list names to recipient lists from **local SQLite** (populated by artifact deployment). Sites do not access the central config DB.
|
||||
- Compose and send emails via SMTP using locally stored SMTP configuration.
|
||||
- On delivery failure, submit the notification to the Store-and-Forward Engine for buffered retry.
|
||||
### Delivery Adapters (Central)
|
||||
- Provide a delivery adapter implementing `INotificationDeliveryAdapter` for each notification `Type`.
|
||||
- Each adapter is a stateless "deliver one notification" implementation: it composes and sends a single notification and classifies the outcome.
|
||||
- The **Email adapter** is the relocated SMTP composition and send logic — formerly run at sites, it now runs on the central cluster.
|
||||
- Resolve a notification list name to its concrete targets (e.g. recipient addresses) at delivery time, on behalf of the Notification Outbox.
|
||||
|
||||
## Notification List Definition
|
||||
|
||||
Each notification list includes:
|
||||
- **Name**: Unique identifier (e.g., "Maintenance-Team", "Shift-Supervisors").
|
||||
- **Recipients**: One or more entries, each with:
|
||||
- **Type**: The notification type — `Email` (implemented now); `Teams` and other types are planned for the future. `Notify.To("list")` works transparently for any type — the calling script does not care about the type.
|
||||
- **Type-specific targets**: The targets appropriate to the list type. For an `Email` list, one or more recipient entries, each with:
|
||||
- Recipient name.
|
||||
- Email address.
|
||||
|
||||
Lists are defined and stored centrally only. **Recipient resolution happens at central, at delivery time** — a site forwards only `(listName, subject, body)` plus provenance; the Notification Outbox asks the Notification Service to resolve the list when it dispatches the notification.
|
||||
|
||||
## Email Server Configuration
|
||||
|
||||
The SMTP configuration is defined centrally and deployed to all sites. It includes:
|
||||
The SMTP configuration is defined centrally and used by the central Email delivery adapter. It is not deployed to sites. It includes:
|
||||
|
||||
- **Server hostname**: SMTP server address (e.g., `smtp.office365.com`).
|
||||
- **Port**: SMTP port (e.g., 587 for StartTLS, 465 for SSL).
|
||||
- **Authentication mode**: One of:
|
||||
- **Basic Auth**: Username and password. For on-prem SMTP relays or servers that support basic authentication.
|
||||
- **OAuth2 Client Credentials**: Tenant ID, Client ID, and Client Secret. For Microsoft 365 and other modern SMTP providers that require OAuth2. The Notification Service handles the token lifecycle internally (fetch, cache, refresh on expiry).
|
||||
- **OAuth2 Client Credentials**: Tenant ID, Client ID, and Client Secret. For Microsoft 365 and other modern SMTP providers that require OAuth2. The Email adapter handles the token lifecycle internally (fetch, cache, refresh on expiry).
|
||||
- **TLS mode**: None, StartTLS, or SSL.
|
||||
- **From address**: The sender email address for all notifications (e.g., `scada-notifications@company.com`).
|
||||
- **Connection timeout**: Maximum time to wait for SMTP connection (default: 30 seconds).
|
||||
- **Max concurrent connections**: Maximum simultaneous SMTP connections per site (default: 5).
|
||||
- **Retry settings**: Max retry count, fixed time between retries (used by Store-and-Forward Engine for transient failures).
|
||||
- **Max concurrent connections**: Maximum simultaneous SMTP connections from the central cluster (default: 5).
|
||||
- **Retry settings**: Max retry count, fixed time between retries. The Notification Outbox reuses these for delivery retry of transient failures.
|
||||
|
||||
## Script API
|
||||
|
||||
```csharp
|
||||
Notify.To("listName").Send("subject", "message")
|
||||
NotificationId id = Notify.To("listName").Send("subject", "message");
|
||||
NotificationStatus status = Notify.Status(id);
|
||||
```
|
||||
|
||||
- Available to instance scripts (via Script Execution Actors), alarm on-trigger scripts (via Alarm Execution Actors), and shared scripts (executing inline).
|
||||
- Resolves the list name to recipients, composes the email, and attempts delivery.
|
||||
- `Notify.To("listName").Send(...)` is **asynchronous**: it generates a `NotificationId` (GUID) locally, hands the notification to the site Store-and-Forward Engine for forwarding to central, and returns the `NotificationId` to the script **immediately**. The script does not block waiting for delivery.
|
||||
- The message body is **plain text** only. No HTML content.
|
||||
- `Notify.Status(notificationId)` returns a small **status record** — the current status, retry count, last error, and key timestamps (enqueued, delivered). While the notification is still in the site Store-and-Forward buffer, the site answers the query **locally** with status `Forwarding`; once forwarded to central, the query round-trips to central and reads the `Notifications` table.
|
||||
- The returned `NotificationId` is a `TrackedOperationId` — the shared Commons tracking-handle type used by all store-and-forward producers; `NotificationId` is simply the notification-domain name for it. Likewise, `Notify.Status` is a thin alias of the unified `Tracking.Status` accessor, retained for backward compatibility. This is a naming/type clarification only — notification delivery behavior is unchanged.
|
||||
|
||||
## Email Delivery Behavior
|
||||
## Notification Delivery Behavior
|
||||
|
||||
### Recipient Handling
|
||||
- A single email is sent per `Notify.To().Send()` call, with all list recipients in **BCC**. The from address is placed in the To field.
|
||||
Delivery is performed centrally by the Notification Outbox, which calls the `INotificationDeliveryAdapter` registered for the notification's `Type`. The behavior below describes the Email adapter.
|
||||
|
||||
### Recipient Handling (Email)
|
||||
- A single email is sent per notification, with all list recipients in **BCC**. The from address is placed in the To field.
|
||||
- Recipients do not see each other's email addresses.
|
||||
- No per-recipient deduplication — if the same email address appears in multiple lists and a script sends to both, they receive multiple emails.
|
||||
|
||||
### Error Classification
|
||||
Consistent with the External System Gateway pattern:
|
||||
- **Transient failures** (connection refused, timeout, SMTP 4xx temporary errors): The notification is handed to the **Store-and-Forward Engine** for buffered retry per the SMTP configuration's retry settings. The script does **not** block waiting for eventual delivery.
|
||||
- **Permanent failures** (SMTP 5xx permanent errors, e.g., mailbox not found): The error is returned **synchronously** to the calling script for handling. No retry — the notification will never deliver.
|
||||
- This prevents the S&F buffer from accumulating notifications that will never succeed.
|
||||
Each `Deliver(...)` call returns one of `success | transient failure | permanent failure`, consistent with the External System Gateway pattern. There is **no synchronous permanent-failure return to the script** — `Send()` returns immediately, before any delivery is attempted.
|
||||
|
||||
- **Transient failures** (connection refused, timeout, SMTP 4xx temporary errors): The Notification Outbox moves the row to `Retrying` and schedules another attempt per the SMTP configuration's retry settings.
|
||||
- **Permanent failures** (SMTP 5xx permanent errors, e.g., mailbox not found): The Notification Outbox moves the row to `Parked` with the error in `LastError`. The notification will never deliver, and an operator can review or discard it on the Central UI Notification Outbox page.
|
||||
- Retries exhausted on a transient failure also result in a `Parked` row.
|
||||
- A script observes failures only by calling `Notify.Status(id)` and seeing a `Parked` status — not as a synchronous exception.
|
||||
|
||||
### No Rate Limiting
|
||||
- No application-level rate limiting. If the SMTP server enforces sending limits (e.g., Microsoft 365 throttling), those manifest as transient failures and are handled naturally by store-and-forward.
|
||||
- No application-level rate limiting. If the SMTP server enforces sending limits (e.g., Microsoft 365 throttling), those manifest as transient failures and are retried naturally by the Notification Outbox.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **Configuration Database (MS SQL)**: Stores notification list definitions and SMTP config (central only).
|
||||
- **Local SQLite**: At sites, notification lists, recipients, and SMTP configuration are read from local SQLite (populated by artifact deployment). Sites do not access the central config DB.
|
||||
- **Store-and-Forward Engine**: Handles buffering for failed email deliveries.
|
||||
- **Configuration Database (MS SQL)**: Stores notification list definitions (name, type, type-specific targets) and SMTP config.
|
||||
- **Notification Outbox**: Invokes the delivery adapters supplied by this component and asks it to resolve notification lists at delivery time.
|
||||
- **Security & Auth**: Design role manages notification lists.
|
||||
- **Configuration Database (via IAuditService)**: Notification list changes are audit logged.
|
||||
|
||||
## Interactions
|
||||
|
||||
- **Site Runtime (Script/Alarm Execution Actors)**: Scripts invoke `Notify.To().Send()` through this component.
|
||||
- **Store-and-Forward Engine**: Failed notifications are buffered here.
|
||||
- **Deployment Manager**: Receives updated notification lists and SMTP config as part of system-wide artifact deployment (triggered explicitly by Deployment role).
|
||||
- **Notification Outbox**: Consumes the per-type delivery adapters and the list-resolution service this component provides; the outbox owns dispatch, retry, parking, and status.
|
||||
- **Site Runtime (Script/Alarm Execution Actors)**: Scripts invoke `Notify.To().Send()` and `Notify.Status()`. `Send()` generates a `NotificationId` and hands the notification to the site Store-and-Forward Engine; it does not contact this component synchronously.
|
||||
- **Store-and-Forward Engine (site)**: Forwards a script's notification to central; the central Notification Outbox ingests it for delivery. The Notification Service does not interact with the site Store-and-Forward Engine directly.
|
||||
|
||||
@@ -76,10 +76,11 @@ Central cluster. Sites do not have user-facing interfaces and do not perform ind
|
||||
- Create and manage instances (overrides, connection bindings, area assignment).
|
||||
- Disable, enable, and delete instances.
|
||||
- Deploy configurations to instances.
|
||||
- Deploy system-wide artifacts (shared scripts, external system definitions, DB connections, notification lists) to all sites.
|
||||
- Deploy system-wide artifacts (shared scripts, external system definitions, DB connections, data connections) to all sites.
|
||||
- View deployment diffs and status.
|
||||
- Use debug view.
|
||||
- Manage parked messages.
|
||||
- Monitor and manage the Notification Outbox (retry and discard parked notifications).
|
||||
- View site event logs.
|
||||
- **Site scoping**: A user with site-scoped Deployment role can only perform these actions for instances at their permitted sites.
|
||||
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
# Component: Site Call Audit
|
||||
|
||||
## Purpose
|
||||
|
||||
Provides central, queryable audit and operational visibility for cached calls
|
||||
made by site scripts — `ExternalSystem.CachedCall()` and `Database.CachedWrite()`.
|
||||
Each such call carries a `TrackedOperationId`; sites report lifecycle telemetry
|
||||
to this component, which maintains a central audit record, computes KPIs, and
|
||||
relays Retry/Discard actions back to the owning site.
|
||||
|
||||
This is the second centrally-hosted observability component for site
|
||||
store-and-forward activity (the Notification Outbox is the first). Unlike the
|
||||
Notification Outbox, Site Call Audit is **not a dispatcher** — it never delivers
|
||||
anything. Cached calls are delivered by the site's Store-and-Forward Engine
|
||||
against site-local external systems and databases, which central cannot reach.
|
||||
|
||||
## Location
|
||||
|
||||
Central cluster only. A singleton actor (`SiteCallAuditActor`) on the active
|
||||
central node. Registered as component #22 in the Host role configuration.
|
||||
|
||||
## Responsibilities
|
||||
|
||||
- Ingest cached-call lifecycle telemetry from sites into the central `SiteCalls`
|
||||
table.
|
||||
- Run periodic per-site reconciliation pulls so missed telemetry self-heals.
|
||||
- Compute point-in-time KPIs (global and per-site) from the `SiteCalls` table.
|
||||
- Relay operator Retry/Discard actions for parked cached calls to the owning
|
||||
site over the command/control channel.
|
||||
- Purge terminal audit rows after a configurable retention window.
|
||||
|
||||
## The `SiteCalls` Table
|
||||
|
||||
Lives in the central MS SQL configuration database — a sibling of the
|
||||
`Notifications` table. One row per `TrackedOperationId`:
|
||||
|
||||
- **TrackedOperationId** — GUID, primary key. Generated site-side at call time.
|
||||
- **SourceSite** — site that issued the call.
|
||||
- **Kind** — `TrackedOperationKind` enum: `ExternalCall` or `DatabaseWrite`.
|
||||
- **TargetSummary** — external system + method name for an `ExternalCall`; for a
|
||||
`DatabaseWrite`, just the database connection name — intentionally not the SQL
|
||||
statement or table, a deliberate scoping choice.
|
||||
- **Status** — `Pending`, `Retrying`, `Delivered`, `Parked`, `Failed`, `Discarded`.
|
||||
- **RetryCount** — attempts so far.
|
||||
- **LastError** — most recent error detail, if any.
|
||||
- **Provenance** — source instance / script.
|
||||
- **CreatedAtUtc**, **UpdatedAtUtc**, **TerminalAtUtc** — key timestamps.
|
||||
|
||||
## Status Lifecycle
|
||||
|
||||
`Pending → Retrying → Delivered / Parked / Failed / Discarded`
|
||||
|
||||
- **Pending** — non-terminal: buffered after a transient failure, awaiting its
|
||||
first retry.
|
||||
- **Retrying** — non-terminal: undergoing retry attempts.
|
||||
- **Delivered** — terminal, success. A cached call that succeeds on its first
|
||||
immediate attempt is recorded directly as `Delivered`.
|
||||
- **Parked** — non-terminal: transient retries exhausted; awaiting manual action.
|
||||
- **Failed** — terminal: permanent failure (e.g. HTTP 4xx). The error was also
|
||||
returned synchronously to the calling script; the record captures it. `Failed`
|
||||
rows are **not operator-actionable** — see Retry / Discard Relay.
|
||||
- **Discarded** — terminal, reached **only by operator action** on a `Parked`
|
||||
row. The row is kept (not deleted) so the table remains a complete audit
|
||||
record.
|
||||
|
||||
The site is the source of truth. The `SiteCalls` row is an eventually-consistent
|
||||
mirror — never queried by scripts (`Tracking.Status()` is answered site-locally).
|
||||
|
||||
## Ingest & Idempotency
|
||||
|
||||
Telemetry ingestion is **insert-if-not-exists** keyed on `TrackedOperationId`,
|
||||
then **upsert-on-newer-status**. The lifecycle is monotonic, so status only
|
||||
advances and never regresses; at-least-once and out-of-order telemetry are
|
||||
therefore harmless.
|
||||
|
||||
## Reconciliation
|
||||
|
||||
Because telemetry is best-effort, `SiteCallAuditActor` periodically — and on site
|
||||
reconnect — pulls "all tracking rows changed since cursor X" from each site.
|
||||
Gaps left by lost telemetry self-heal. Central converges to the site; the site
|
||||
never depends on central.
|
||||
|
||||
## Retry / Discard Relay
|
||||
|
||||
Parked cached calls live in the owning site's S&F buffer. Operator Retry/Discard
|
||||
from the Central UI is relayed to that site as a `RetryParkedOperation` /
|
||||
`DiscardParkedOperation` command over the command/control channel. The site
|
||||
applies the change and emits telemetry reflecting the new state; central never
|
||||
mutates the `SiteCalls` row directly. If the site is offline the command fails
|
||||
fast and the UI surfaces a "site unreachable" message.
|
||||
|
||||
Only `Parked` rows are operator-actionable. `Failed` rows offer no Retry or
|
||||
Discard: a permanent failure (e.g. HTTP 4xx) would simply fail again, and the
|
||||
error was already returned synchronously to the calling script — there is
|
||||
nothing for an operator to recover.
|
||||
|
||||
## KPIs
|
||||
|
||||
Point-in-time, computed from the `SiteCalls` table, global and per-source-site,
|
||||
mirroring the Notification Outbox KPI shape:
|
||||
|
||||
- Buffered count (`Pending` + `Retrying`)
|
||||
- Parked count
|
||||
- Failed-last-interval
|
||||
- Delivered-last-interval
|
||||
- Oldest-pending age
|
||||
- Stuck count — `Pending`/`Retrying` older than a configurable threshold
|
||||
(default 10 minutes); display-only, no escalation.
|
||||
|
||||
## Retention
|
||||
|
||||
Daily purge of terminal rows (`Delivered`, `Failed`, `Discarded`) after a
|
||||
configurable window (default 365 days), matching the `Notifications` purge.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **Configuration Database**: hosts the `SiteCalls` table and its repository.
|
||||
- **Central–Site Communication**: receives cached-call telemetry and reconciliation
|
||||
responses; sends Retry/Discard commands.
|
||||
- **Store-and-Forward Engine**: the site-side origin of cached-call telemetry and
|
||||
the executor of relayed Retry/Discard commands.
|
||||
- **Commons**: `TrackedOperationId`, status enum, telemetry message contracts.
|
||||
|
||||
## Interactions
|
||||
|
||||
- **Central UI**: the Site Calls page queries this component and issues
|
||||
Retry/Discard actions.
|
||||
- **Health Monitoring**: surfaces Site Call Audit KPI tiles on the dashboard.
|
||||
- **Cluster Infrastructure**: hosts the `SiteCallAuditActor` singleton with
|
||||
active/standby failover.
|
||||
@@ -25,12 +25,13 @@ Site clusters (event recording and storage). Central cluster (remote query acces
|
||||
| Data Connection Status | Connected, disconnected, reconnected (per connection) |
|
||||
| Store-and-Forward | Message queued, delivered, retried, parked |
|
||||
| Instance Lifecycle | Instance enabled, disabled, deleted |
|
||||
| Notification | Site→central forward failure, long-buffered notification (still in the site buffer past a threshold) |
|
||||
|
||||
## Event Entry Schema
|
||||
|
||||
Each event entry contains:
|
||||
- **Timestamp**: When the event occurred.
|
||||
- **Event Type**: Category of the event (script, alarm, deployment, connection, store-and-forward, instance-lifecycle).
|
||||
- **Event Type**: Category of the event (script, alarm, deployment, connection, store-and-forward, instance-lifecycle, notification).
|
||||
- **Severity**: Info, Warning, or Error.
|
||||
- **Instance ID** *(optional)*: The instance associated with the event (if applicable).
|
||||
- **Source**: The subsystem that generated the event (e.g., "ScriptActor:MonitorSpeed", "AlarmActor:OverTemp", "DataConnection:PLC1").
|
||||
@@ -62,11 +63,12 @@ Each event entry contains:
|
||||
- **Communication Layer**: Handles remote query requests from central.
|
||||
- **Site Runtime**: Generates script execution events, alarm events, deployment application events, and instance lifecycle events.
|
||||
- **Data Connection Layer**: Generates connection status events.
|
||||
- **Store-and-Forward Engine**: Generates buffer activity events.
|
||||
- **Store-and-Forward Engine**: Generates buffer activity events, including notification-category forward failures and long-buffered notifications on the site→central notification path.
|
||||
|
||||
## Interactions
|
||||
|
||||
- **All site subsystems**: Event logging is a cross-cutting concern — any subsystem that produces notable events calls the Event Logging service.
|
||||
- **Communication Layer**: Receives remote queries from central and returns results.
|
||||
- **Central UI**: Site Event Log Viewer displays queried events.
|
||||
- **Store-and-Forward Engine**: Its notification path (the site→central forward of script-generated notifications) reports forward failures and long-buffered notifications as Notification-category events. Routine enqueue and forward-success events are deliberately not logged — central's authoritative `Notifications` table (owned by the Notification Outbox component) is the audit record of record; site-side logging covers only the in-transit blind spot when central is unreachable.
|
||||
- **Health Monitoring**: Script error rates and alarm evaluation error rates can be derived from event log data.
|
||||
|
||||
@@ -66,7 +66,7 @@ Deployment Manager Singleton (Cluster Singleton)
|
||||
- Reports deployment result (success/failure) back to central.
|
||||
|
||||
### System-Wide Artifact Handling
|
||||
- Receives updated shared scripts, external system definitions, database connection definitions, data connection definitions, notification lists, and SMTP configuration from central.
|
||||
- Receives updated shared scripts, external system definitions, database connection definitions, and data connection definitions from central. (Notification lists and SMTP configuration are central-only and are not deployed to sites — see Component-NotificationService.md.)
|
||||
- Stores all artifacts in local SQLite. After artifact deployment, the site is fully self-contained — all runtime configuration is read from local SQLite with no access to the central configuration database.
|
||||
- Recompiles shared scripts and makes updated code available to all Script Actors.
|
||||
|
||||
@@ -254,14 +254,19 @@ Available to all Script Execution Actors and Alarm Execution Actors:
|
||||
|
||||
### External Systems
|
||||
- `ExternalSystem.Call("systemName", "methodName", params)` — Synchronous HTTP call. Blocks until response or timeout. All failures return to script. Use when the script needs the result.
|
||||
- `ExternalSystem.CachedCall("systemName", "methodName", params)` — Fire-and-forget with store-and-forward on transient failure. Use for outbound data pushes where deferred delivery is acceptable.
|
||||
- `ExternalSystem.CachedCall("systemName", "methodName", params)` — Deferred delivery. Returns a `TrackedOperationId` tracking handle immediately rather than the response; the call is attempted immediately and, on transient failure, store-and-forwarded for retry. Use for outbound data pushes where deferred delivery is acceptable.
|
||||
- The returned `TrackedOperationId` can be passed to `Tracking.Status(id)` (see **Operation Tracking** below) to observe delivery progress.
|
||||
|
||||
### Notifications
|
||||
- `Notify.To("listName").Send("subject", "message")` — Send an email notification via a named notification list.
|
||||
- `Notify.To("listName").Send("subject", "message")` — Send a notification via a named notification list. Generates a `TrackedOperationId` (GUID) locally and returns it immediately; the notification is store-and-forwarded to the central cluster, which owns delivery. The script never contacts SMTP. (`NotificationId` is the notification-domain name for this same `TrackedOperationId` type.)
|
||||
- `Notify.Status("trackedOperationId")` — A thin alias of `Tracking.Status(id)` retained for the notification domain. Returns a status record (status, retry count, last error, key timestamps). While the notification is still in the site store-and-forward buffer the site answers locally (status `Forwarding`); once forwarded the query round-trips to central.
|
||||
|
||||
### Database Access
|
||||
- `Database.Connection("connectionName")` — Obtain a raw MS SQL client connection (ADO.NET) for synchronous read/write.
|
||||
- `Database.CachedWrite("connectionName", "sql", parameters)` — Submit a write operation for store-and-forward delivery.
|
||||
- `Database.CachedWrite("connectionName", "sql", parameters)` — Submit a write operation for store-and-forward delivery. Returns a `TrackedOperationId` tracking handle immediately; pass it to `Tracking.Status(id)` to observe delivery progress.
|
||||
|
||||
### Operation Tracking
|
||||
- `Tracking.Status("trackedOperationId")` — Returns a status record (status, retry count, last error, key timestamps) for any tracked operation: a cached external system call, a cached database write, or a notification. For cached calls and writes the answer is always site-local and authoritative — the site owns the operation tracking table. (`Notify.Status(...)` is a thin alias scoped to the notification domain.)
|
||||
|
||||
### Parameter Access
|
||||
- `Parameters["key"]` — Raw dictionary access (returns `object?`, requires manual casting).
|
||||
@@ -282,7 +287,7 @@ Available to all Script Execution Actors and Alarm Execution Actors:
|
||||
|
||||
Scripts execute **in-process** with constrained access. The following restrictions are enforced at compilation and runtime:
|
||||
|
||||
- **Allowed**: Access to the Script Runtime API (GetAttribute, SetAttribute, CallScript, CallShared, ExternalSystem, Notify, Database), standard C# language features, basic .NET types (collections, string manipulation, math, date/time).
|
||||
- **Allowed**: Access to the Script Runtime API (GetAttribute, SetAttribute, CallScript, CallShared, ExternalSystem, Notify, Database, Tracking), standard C# language features, basic .NET types (collections, string manipulation, math, date/time).
|
||||
- **Forbidden**: File system access (`System.IO`), process spawning (`System.Diagnostics.Process`), threading (`System.Threading` — except async/await), reflection (`System.Reflection`), raw network access (`System.Net.Sockets`, `System.Net.Http` — must use `ExternalSystem.Call`), assembly loading, unsafe code.
|
||||
- **Execution timeout**: Configurable per-script maximum execution time. Exceeding the timeout cancels the script and logs an error.
|
||||
- **Memory**: Scripts share the host process memory. No per-script memory limit, but the execution timeout prevents runaway allocations.
|
||||
@@ -353,13 +358,12 @@ Per Akka.NET best practices, internal actor communication uses **Tell** (fire-an
|
||||
## Dependencies
|
||||
|
||||
- **Data Connection Layer**: Provides tag value updates to Instance Actors. Receives write requests from Instance Actors.
|
||||
- **Store-and-Forward Engine**: Handles reliable delivery for external system calls, notifications, and cached database writes submitted by scripts.
|
||||
- **Store-and-Forward Engine**: Handles reliable delivery for external system calls, cached database writes, and notifications submitted by scripts. For the notification category specifically, it forwards to the central cluster for delivery (not directly to SMTP). Owns the site-local operation tracking table that backs `Tracking.Status(id)`.
|
||||
- **External System Gateway**: Provides external system method invocations for scripts.
|
||||
- **Notification Service**: Handles email delivery for scripts.
|
||||
- **Communication Layer**: Receives deployments and lifecycle commands from central. Handles debug view requests. Reports deployment results.
|
||||
- **Site Event Logging**: Records script executions, alarm events, deployment events, instance lifecycle events.
|
||||
- **Health Monitoring**: Reports script error rates and alarm evaluation error rates.
|
||||
- **Local SQLite**: Persists deployed configurations, system-wide artifacts (external system definitions, database connection definitions, data connection definitions, notification lists, SMTP configuration).
|
||||
- **Local SQLite**: Persists deployed configurations, system-wide artifacts (external system definitions, database connection definitions, data connection definitions).
|
||||
|
||||
## Interactions
|
||||
|
||||
|
||||
@@ -13,14 +13,16 @@ Site clusters only. The central cluster does not buffer messages.
|
||||
- Buffer outbound messages when the target system is unavailable.
|
||||
- Manage three categories of buffered messages:
|
||||
- External system API calls.
|
||||
- Email notifications.
|
||||
- Notifications forwarded to the central cluster.
|
||||
- Cached database writes.
|
||||
- Retry delivery per message according to the configured retry policy.
|
||||
- Park messages that exhaust their retry limit (dead-letter).
|
||||
- Persist buffered messages to local SQLite for durability.
|
||||
- Maintain a site-local **operation tracking table** holding one row per `TrackedOperationId` for cached calls (`ExternalCall` and `DatabaseWrite`) — the authoritative status record consulted by `Tracking.Status(id)`.
|
||||
- Emit cached-call lifecycle telemetry to the central Site Call Audit component on every status transition.
|
||||
- Replicate buffered messages to the standby node via application-level replication over Akka.NET remoting.
|
||||
- On failover, the standby node takes over delivery from its replicated copy.
|
||||
- Respond to remote queries from central for parked message management (list, retry, discard).
|
||||
- Respond to remote queries from central for parked message management (list, retry, discard), including central-driven Retry/Discard of parked cached calls.
|
||||
|
||||
## Message Lifecycle
|
||||
|
||||
@@ -42,16 +44,23 @@ Attempt immediate delivery
|
||||
└── Max retries exhausted → Park message
|
||||
```
|
||||
|
||||
For notifications, "delivery" means forwarding the message to the central cluster via Central–Site Communication; "success" is central's ack, on which the message is cleared. Notifications do not park — they are retried at the fixed forward interval until central acks. Parking applies only to the external-system-call and cached-database-write categories.
|
||||
|
||||
For the cached-call categories (`ExternalCall` and `DatabaseWrite`), the operation tracking table is the status record and the S&F buffer is purely the retry mechanism. A cached call that succeeds on its first immediate attempt is written directly as a terminal `Delivered` tracking row and never enters the S&F buffer. When immediate delivery fails transiently, the message is buffered and its tracking row moves to `Pending`/`Retrying`; the buffered message carries its `TrackedOperationId` so the tracking row and the retry record stay linked. When immediate delivery fails **permanently** (e.g. HTTP 4xx), the message is not buffered — the error is returned synchronously to the calling script as before — but the tracking row is written directly as a terminal `Failed` row capturing the error. On every tracking-table status transition the site emits `CachedCallTelemetry` to central.
|
||||
|
||||
Every cached-call outcome maps to a tracking-table state: immediate success → `Delivered`; transient failure → `Pending`/`Retrying`, eventually `Delivered` or `Parked`; permanent failure → terminal `Failed`; operator discard of a parked row → terminal `Discarded`.
|
||||
|
||||
## Retry Policy
|
||||
|
||||
Retry settings are defined on the **source entity** (not per-message):
|
||||
For the external-system-call and cached-database-write categories, retry settings are defined on the **source entity** (not per-message):
|
||||
- **External systems**: Each external system definition includes max retry count and time between retries.
|
||||
- **Notifications**: Email/SMTP configuration includes max retry count and time between retries.
|
||||
- **Cached database writes**: Each database connection definition includes max retry count and time between retries.
|
||||
|
||||
The **notification** category retries differently: it has no source-entity setting. The site→central forward uses a single fixed retry interval configured in the host `appsettings.json`. This interval is infrastructure config for reaching the central cluster, not a per-notification-list setting. It applies uniformly to every buffered notification regardless of its target list. A buffered notification is retried until central acks it; it is not parked on a retry limit (central, once reachable, owns delivery, retry, and parking from that point on).
|
||||
|
||||
The retry interval is **fixed** (not exponential backoff). Fixed interval is sufficient for the expected use cases.
|
||||
|
||||
**Note**: Only **transient failures** are eligible for store-and-forward buffering. For external system calls, transient failures are connection errors, timeouts, and HTTP 5xx responses. Permanent failures (HTTP 4xx) are returned directly to the calling script and are **not** queued for retry. This prevents the buffer from accumulating requests that will never succeed.
|
||||
**Note**: Only **transient failures** are eligible for store-and-forward buffering. For external system calls, transient failures are connection errors, timeouts, and HTTP 5xx responses. Permanent failures (HTTP 4xx) are returned directly to the calling script and are **not** queued for retry. This prevents the buffer from accumulating requests that will never succeed. For the cached-call categories, a permanent failure additionally sets the operation's tracking-table row to terminal `Failed`, capturing the error — so even a never-buffered cached call has an authoritative status record. `Failed` rows are not operator-actionable: a permanent failure would only fail again, and the error was already returned to the script.
|
||||
|
||||
## Buffer Size
|
||||
|
||||
@@ -65,6 +74,22 @@ There is **no maximum buffer size**. Messages accumulate in the buffer until del
|
||||
- On failover, the new active node has a near-complete copy of the buffer. In rare cases, the most recent operations may not have been replicated (e.g., a message added or removed just before failover). This can result in a few **duplicate deliveries** (message delivered but remove not replicated) or a few **missed retries** (message added but not replicated). Both are acceptable trade-offs for the latency benefit.
|
||||
- On failover, the new active node resumes delivery from its local copy.
|
||||
|
||||
### Operation Tracking Table
|
||||
|
||||
Alongside the S&F buffer DB, each site node holds a **site-local operation tracking table** in SQLite. It carries one row per `TrackedOperationId` for cached calls (`ExternalCall` and `DatabaseWrite`), created the moment the script issues the cached call and kept regardless of outcome.
|
||||
|
||||
- This table is the **status record**; the S&F buffer remains purely the **retry mechanism**. A buffered cached-call message references its `TrackedOperationId` back to its tracking row.
|
||||
- Each row records the operation kind (`TrackedOperationKind`), a target summary (external system + method, or database connection name), the unified `TrackedOperationStatus`, retry count, last error, source provenance (instance / script), and the created/updated/terminal UTC timestamps.
|
||||
- `Tracking.Status(id)` reads this table. For cached calls the **site is the authoritative source of truth** for status — the query is always answered site-locally, even when central is unreachable. The central Site Call Audit `SiteCalls` table is an eventually-consistent mirror.
|
||||
- A cached call that succeeds on its first immediate attempt writes a terminal `Delivered` row directly here, with nothing placed in the S&F buffer.
|
||||
- Terminal rows are purged after a configurable retention window (default 7 days) — the site holds live operational state; central holds long-term audit.
|
||||
|
||||
Notifications are unaffected: they have no tracking table. Their `NotificationId` and status are owned by the central `Notifications` table, and their lifecycle continues to forward to central exactly as before.
|
||||
|
||||
### Telemetry to Central
|
||||
|
||||
On every tracking-table status transition, the site emits a `CachedCallTelemetry` message to the central Site Call Audit component over the site→central channel. Emission is best-effort, at-least-once, and idempotent on `TrackedOperationId`. Because telemetry is best-effort, the site also responds to `CachedCallReconcileRequest` reconciliation pulls — cursor-based per-site reads of tracking rows changed since a cursor — so any missed telemetry self-heals. The site never depends on central; central converges to the site.
|
||||
|
||||
## Parked Message Management
|
||||
|
||||
- Parked messages remain stored at the site in SQLite.
|
||||
@@ -72,31 +97,36 @@ There is **no maximum buffer size**. Messages accumulate in the buffer until del
|
||||
- Operators can:
|
||||
- **Retry** a parked message (moves it back to the retry queue).
|
||||
- **Discard** a parked message (removes it permanently).
|
||||
- Store-and-forward messages are **not** automatically cleared when an instance is deleted. Pending and parked messages continue to exist and can be managed via the central UI.
|
||||
- For parked cached calls, Retry/Discard can be driven centrally: the Site Call Audit component relays `RetryParkedOperation` / `DiscardParkedOperation` commands (keyed by `TrackedOperationId`) down to the owning site. The site applies the command to its S&F buffer and tracking table, then emits `CachedCallTelemetry` reflecting the new state (`Retrying` or `Discarded`) — central never mutates its mirror row directly.
|
||||
- Store-and-forward messages are **not** automatically cleared when an instance is deleted. Pending and parked messages, and their tracking rows, continue to exist and can be managed via the central UI.
|
||||
|
||||
## Message Format
|
||||
|
||||
Each buffered message stores:
|
||||
- **Message ID**: Unique identifier.
|
||||
- **Category**: External system call, notification, or cached database write.
|
||||
- **Target**: External system name, notification list name, or database connection name.
|
||||
- **Payload**: Serialized message content (API method + parameters, email subject + body, SQL + parameters).
|
||||
- **Tracked Operation ID**: For the cached-call categories, the `TrackedOperationId` linking the buffered message to its row in the operation tracking table. Not used by the notification category, which is tracked centrally via its `NotificationId`.
|
||||
- **Target**: External system name, the central cluster (for notifications), or database connection name.
|
||||
- **Payload**: Serialized message content (API method + parameters; notification list name + subject + body plus the locally generated `NotificationId` and source provenance; SQL + parameters).
|
||||
- **Retry Count**: Number of attempts so far.
|
||||
- **Created At**: Timestamp when the message was first queued.
|
||||
- **Last Attempt At**: Timestamp of the most recent delivery attempt.
|
||||
- **Status**: Pending, retrying, or parked.
|
||||
- **Status**: Pending, retrying, or parked. This is the **buffer message's** retry state, distinct from the operation's `TrackedOperationStatus` lifecycle in the operation tracking table. A buffer message exists only while a cached call is mid-retry, so it never carries the terminal `Delivered`, `Failed`, or `Discarded` states — those live solely on the tracking row.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **SQLite**: Local persistence on each node.
|
||||
- **Communication Layer**: Application-level replication to standby node; remote query handling from central.
|
||||
- **Communication Layer**: Application-level replication to standby node; remote query handling from central; carries buffered notifications to the central cluster (ClusterClient) and receives central's acks.
|
||||
- **External System Gateway**: Delivers external system API calls.
|
||||
- **Notification Service**: Delivers email notifications.
|
||||
- **Central–Site Communication**: The delivery target for the notification category — a buffered notification is forwarded to the central cluster over Central–Site Communication and cleared on central's ack. Also carries `CachedCallTelemetry` and reconciliation responses to central, and receives `RetryParkedOperation` / `DiscardParkedOperation` commands.
|
||||
- **Site Call Audit**: The central audit mirror for cached calls — receives this engine's cached-call telemetry and reconciliation responses, and relays operator Retry/Discard of parked cached calls back as commands.
|
||||
- **Database Connections**: Delivers cached database writes.
|
||||
- **Site Event Logging**: Logs store-and-forward activity (queued, delivered, retried, parked).
|
||||
|
||||
## Interactions
|
||||
|
||||
- **Site Runtime (Script Actors)**: Scripts submit messages to the buffer (external calls, notifications, cached DB writes).
|
||||
- **Communication Layer**: Handles parked message queries/commands from central.
|
||||
- **Health Monitoring**: Reports buffer depth metrics.
|
||||
- **Communication Layer**: Handles parked message queries/commands from central; carries buffered notifications to the central cluster.
|
||||
- **Notification Outbox**: The central destination for the notification category — central ingests each forwarded notification into the `Notifications` table and acks the site, on which the engine clears the buffered message.
|
||||
- **Site Call Audit**: The central observability sibling for the cached-call categories — this engine emits `CachedCallTelemetry` on every tracking-table transition, answers `CachedCallReconcileRequest` pulls, and executes the `RetryParkedOperation` / `DiscardParkedOperation` commands it relays.
|
||||
- **Health Monitoring**: Reports buffer depth metrics, including the notification backlog covering the site→central forward leg.
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
- **Pre-deployment validation**: Before any deployment is sent to a site, the central cluster performs comprehensive validation including flattening the configuration, test-compiling all scripts, verifying alarm trigger references, verifying script trigger references, and checking data connection binding completeness (see Section 3.11).
|
||||
|
||||
### 1.5 System-Wide Artifact Deployment
|
||||
- Changes to shared scripts, external system definitions, database connection definitions, data connection definitions, notification lists, and SMTP configuration are **not automatically propagated** to sites.
|
||||
- Changes to shared scripts, external system definitions, database connection definitions, and data connection definitions are **not automatically propagated** to sites. (Notification lists and SMTP configuration are central-only and are not deployed to sites — see Section 6.)
|
||||
- Deployment of system-wide artifacts requires **explicit action** by a user with the **Deployment** role.
|
||||
- Artifacts can be deployed to **all sites at once** or to an **individual site** (per-site deployment).
|
||||
- The Design role manages the definitions; the Deployment role triggers deployment to sites. A user may hold both roles.
|
||||
@@ -55,7 +55,7 @@
|
||||
|
||||
### 2.3 Site-Level Storage & Interface
|
||||
- Sites have **no user interface** — they are headless collectors, forwarders, and script executors.
|
||||
- Sites require local storage for: the current deployed (flattened) configurations, deployed scripts, shared scripts, external system definitions, database connection definitions, data connection definitions, notification lists, and SMTP configuration.
|
||||
- Sites require local storage for: the current deployed (flattened) configurations, deployed scripts, shared scripts, external system definitions, database connection definitions, and data connection definitions. (Notification lists and SMTP configuration are central-only and are not stored at sites — see Section 6.)
|
||||
- After artifact deployment, sites are **fully self-contained** — all runtime configuration is read from local SQLite. Sites do **not** access the central configuration database at runtime.
|
||||
- Store-and-forward buffers are persisted to a **local SQLite database on each node** and replicated between nodes via application-level replication (see 1.3).
|
||||
|
||||
@@ -231,7 +231,7 @@ Scripts executing on a site for a given instance can:
|
||||
- **Write** attribute values on that instance. For attributes with a data source reference, the write goes to the Data Connection Layer which writes to the physical device; the in-memory value updates when the device confirms the new value via the existing subscription. For static attributes, the write updates the in-memory value and **persists the override to local SQLite** — the value survives restart and failover. Persisted overrides are reset when the instance is redeployed.
|
||||
- **Call other scripts** on that instance via `Instance.CallScript("scriptName", params)`. Calls use the Akka ask pattern and return the called script's return value. Script-to-script calls support concurrent execution.
|
||||
- **Call shared scripts** via `Scripts.CallShared("scriptName", params)`. Shared scripts execute **inline** in the calling Script Actor's context — they are compiled code libraries, not separate actors.
|
||||
- **Call external system API methods** in two modes: `ExternalSystem.Call()` for synchronous request/response, or `ExternalSystem.CachedCall()` for fire-and-forget with store-and-forward on transient failure (see Section 5).
|
||||
- **Call external system API methods** in two modes: `ExternalSystem.Call()` for synchronous request/response, or `ExternalSystem.CachedCall()` for deferred delivery — it returns a `TrackedOperationId` tracking handle immediately and store-and-forwards the call on transient failure (see Section 5).
|
||||
- **Send notifications** (see Section 6).
|
||||
- **Access databases** by requesting an MS SQL client connection by name (see Section 5.5).
|
||||
|
||||
@@ -286,7 +286,7 @@ Scripts **cannot** access other instances' attributes or scripts.
|
||||
- Parked messages are **stored at the site** where they originated.
|
||||
- The **central UI** can **query sites** for parked messages and manage them remotely.
|
||||
- Operators can **retry** or **discard** parked messages from the central UI.
|
||||
- Parked message management covers **external system calls**, **notifications**, and **cached database writes**.
|
||||
- Parked message management covers **external system calls** and **cached database writes**. (Notifications are not parked at sites — they are store-and-forwarded to central, which owns delivery and parking; see Section 6.)
|
||||
|
||||
### 5.5 Database Connections
|
||||
- Database connections are **predefined, named resources** created by users with the **Design** role.
|
||||
@@ -306,23 +306,23 @@ Scripts can interact with databases in two modes:
|
||||
|
||||
### 6.1 Notification Lists
|
||||
- Notification lists are **system-wide**, managed by users with the **Design** role.
|
||||
- Each list has a **name** and contains one or more **recipients**.
|
||||
- Each recipient has a **name** and an **email address**.
|
||||
- Notification lists are deployed to **all sites** (deployment requires explicit action by a user with the Deployment role).
|
||||
- At the site, notification lists and recipients are read from **local SQLite** (populated by artifact deployment), not from the central config DB.
|
||||
- Each list has a **name** and a **`Type`** (`Email` now; `Teams` and other types planned). The type determines the type-specific targets the list carries.
|
||||
- An `Email` list contains one or more **recipients**, each with a **name** and an **email address**.
|
||||
- Notification lists are defined and stored **centrally only** — they are **not deployed to sites** and have no local SQLite copy. Recipient resolution happens at central, at delivery time.
|
||||
|
||||
### 6.2 Email Support
|
||||
- The system has **predefined support for sending email** as the notification delivery mechanism.
|
||||
- Email server configuration (SMTP settings) is defined centrally and deployed to all sites as part of **artifact deployment** (see Section 1.5). Sites read SMTP configuration from **local SQLite**.
|
||||
- Email server configuration (SMTP settings) is defined and stored **centrally only**. Sites never talk to SMTP; all delivery happens on the central cluster.
|
||||
|
||||
### 6.3 Script API
|
||||
- Scripts send notifications using a simplified API: `Notify.To("list name").Send("subject", "message")`
|
||||
- This API is available to instance scripts, alarm on-trigger scripts, and shared scripts.
|
||||
- `Send()` generates a `NotificationId` (GUID) locally and returns it to the script immediately; `Notify.Status(notificationId)` returns a status record (status, retry count, last error, key timestamps).
|
||||
|
||||
### 6.4 Store-and-Forward for Notifications
|
||||
- If the email server is unavailable, notifications are **buffered locally at the site**.
|
||||
- Follows the same retry pattern as external system calls: configurable **max retry count** and **time between retries** (fixed interval).
|
||||
- After max retries are exhausted, the notification is **parked** for manual review (managed via central UI alongside external system parked messages).
|
||||
### 6.4 Notification Delivery and Store-and-Forward
|
||||
- Notification delivery is **inverted to the central cluster**. A site script's notification is **store-and-forwarded to central** (target = central, not SMTP); the central **Notification Outbox** logs every notification to a `Notifications` audit table and owns dispatch, delivery, retry, and parking.
|
||||
- If the central cluster is unreachable, the notification is **buffered locally at the site** by the Store-and-Forward Engine and retried at a fixed forward interval until central acks it. The site→central forward does not park.
|
||||
- Once central holds the notification, delivery retry reuses the central SMTP configuration's **max retry count** and fixed **time between retries**. After retries are exhausted, or on a permanent failure, the notification is **parked** for review on the Central UI Notification Outbox page.
|
||||
- There is **no maximum buffer size** for notification messages.
|
||||
|
||||
## 7. Inbound API (Central)
|
||||
@@ -370,10 +370,11 @@ The central cluster hosts a **configuration and management UI** (no live machine
|
||||
- **Site & Data Connection Management**: Define sites (including optional NodeAAddress and NodeBAddress fields for Akka remoting paths, and optional GrpcNodeAAddress and GrpcNodeBAddress fields for gRPC streaming endpoints), manage data connections and assign them to sites.
|
||||
- **Area Management**: Define hierarchical area structures per site for organizing instances.
|
||||
- **Deployment**: View diffs between deployed and current template-derived configurations, deploy updates to individual instances. Filter instances by area. Pre-deployment validation runs automatically before any deployment is sent.
|
||||
- **System-Wide Artifact Deployment**: Explicitly deploy shared scripts, external system definitions, database connection definitions, data connection definitions, notification lists, and SMTP configuration to all sites or to an individual site (requires Deployment role). Per-site deployment is available via the Sites admin page.
|
||||
- **System-Wide Artifact Deployment**: Explicitly deploy shared scripts, external system definitions, database connection definitions, and data connection definitions to all sites or to an individual site (requires Deployment role). Per-site deployment is available via the Sites admin page. (Notification lists and SMTP configuration are central-only and are not deployed.)
|
||||
- **Deployment Status Monitoring**: Track whether deployments were successfully applied at site level.
|
||||
- **Debug View**: On-demand real-time view of a specific instance's tag values and alarm states for troubleshooting (see 8.1).
|
||||
- **Parked Message Management**: Query sites for parked messages (external system calls, notifications, and cached database writes), retry or discard them.
|
||||
- **Parked Message Management**: Query sites for parked messages (external system calls and cached database writes), retry or discard them.
|
||||
- **Notification Outbox**: Monitor centrally-delivered notifications — KPI tiles and a queryable `Notifications` audit list with Retry/Discard actions on parked notifications (see Section 6).
|
||||
- **Health Monitoring Dashboard**: View site cluster health, node status, data connection health, script error rates, alarm evaluation errors, and store-and-forward buffer depths (see Section 11).
|
||||
- **Site Event Log Viewer**: Query and view operational event logs from site clusters (see Section 12).
|
||||
|
||||
@@ -422,7 +423,7 @@ All system-modifying actions are logged, including:
|
||||
- **Alarm changes**: Create, edit, delete alarm definitions.
|
||||
- **Instance changes**: Create, override values, bind connections, area assignment, disable, enable, delete.
|
||||
- **Deployments**: Who deployed what to which instance, and the result (success/failure).
|
||||
- **System-wide artifact deployments**: Who deployed shared scripts / external system definitions / DB connections / data connections / notification lists / SMTP config, to which site(s), and the result.
|
||||
- **System-wide artifact deployments**: Who deployed shared scripts / external system definitions / DB connections / data connections, to which site(s), and the result.
|
||||
- **External system definition changes**: Create, edit, delete.
|
||||
- **Database connection changes**: Create, edit, delete.
|
||||
- **Notification list changes**: Create, edit, delete lists and recipients.
|
||||
@@ -448,7 +449,8 @@ The central cluster monitors the health of each site cluster, including:
|
||||
- **Data connection health**: Connected/disconnected status per data connection at the site.
|
||||
- **Script error rates**: Frequency of script failures at the site.
|
||||
- **Alarm evaluation errors**: Frequency of alarm evaluation failures at the site.
|
||||
- **Store-and-forward buffer depth**: Number of messages currently queued (broken down by external system calls, notifications, and cached database writes).
|
||||
- **Store-and-forward buffer depth**: Number of messages currently queued (broken down by external system calls, notifications, and cached database writes). The notification backlog covers the site→central forward leg.
|
||||
- **Notification Outbox KPIs**: Central-computed delivery-health metrics — queue depth, stuck count, and parked count — surfaced as headline tiles on the health dashboard. These are computed centrally from the `Notifications` table, not collected from sites.
|
||||
|
||||
### 11.2 Reporting
|
||||
- Site clusters **report health metrics to central** periodically.
|
||||
|
||||
@@ -24,9 +24,6 @@
|
||||
<li class="nav-item">
|
||||
<NavLink class="nav-link" href="/admin/api-keys">API Keys</NavLink>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<NavLink class="nav-link" href="/admin/smtp">SMTP Configuration</NavLink>
|
||||
</li>
|
||||
</Authorized>
|
||||
</AuthorizeView>
|
||||
|
||||
@@ -65,6 +62,35 @@
|
||||
</Authorized>
|
||||
</AuthorizeView>
|
||||
|
||||
@* Notifications — mixed-role section; each item gated by its own policy.
|
||||
The header is ungated: every authenticated user holds at least one of
|
||||
Admin/Design/Deployment, so it always has a visible child. *@
|
||||
<div role="presentation" class="nav-section-header">Notifications</div>
|
||||
<AuthorizeView Policy="@AuthorizationPolicies.RequireAdmin">
|
||||
<Authorized Context="notifAdminContext">
|
||||
<li class="nav-item">
|
||||
<NavLink class="nav-link" href="/notifications/smtp">SMTP Configuration</NavLink>
|
||||
</li>
|
||||
</Authorized>
|
||||
</AuthorizeView>
|
||||
<AuthorizeView Policy="@AuthorizationPolicies.RequireDesign">
|
||||
<Authorized Context="notifDesignContext">
|
||||
<li class="nav-item">
|
||||
<NavLink class="nav-link" href="/notifications/lists">Notification Lists</NavLink>
|
||||
</li>
|
||||
</Authorized>
|
||||
</AuthorizeView>
|
||||
<AuthorizeView Policy="@AuthorizationPolicies.RequireDeployment">
|
||||
<Authorized Context="notifDeploymentContext">
|
||||
<li class="nav-item">
|
||||
<NavLink class="nav-link" href="/notifications/report">Notification Report</NavLink>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<NavLink class="nav-link" href="/notifications/kpis">Notification KPIs</NavLink>
|
||||
</li>
|
||||
</Authorized>
|
||||
</AuthorizeView>
|
||||
|
||||
@* Monitoring — Health Dashboard is all-roles; Event Logs and
|
||||
Parked Messages are Deployment-role only (Component-CentralUI). *@
|
||||
<div role="presentation" class="nav-section-header">Monitoring</div>
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
@page "/design/external-systems"
|
||||
@using ScadaLink.Security
|
||||
@using ScadaLink.Commons.Entities.ExternalSystems
|
||||
@using ScadaLink.Commons.Entities.Notifications
|
||||
@using ScadaLink.Commons.Entities.InboundApi
|
||||
@using ScadaLink.Commons.Interfaces.Repositories
|
||||
@attribute [Authorize(Policy = AuthorizationPolicies.RequireDesign)]
|
||||
@inject IExternalSystemRepository ExternalSystemRepository
|
||||
@inject INotificationRepository NotificationRepository
|
||||
@inject IInboundApiRepository InboundApiRepository
|
||||
@inject NavigationManager NavigationManager
|
||||
@inject IDialogService Dialog
|
||||
@@ -47,15 +45,6 @@
|
||||
Database Connections <span class="badge bg-secondary">@_dbConnections.Count</span>
|
||||
</button>
|
||||
</li>
|
||||
<li class="nav-item" role="presentation">
|
||||
<button class="nav-link @(_tab == "notif" ? "active" : "")"
|
||||
role="tab"
|
||||
aria-selected="@(_tab == "notif" ? "true" : "false")"
|
||||
aria-controls="int-tab-notif"
|
||||
@onclick='() => _tab = "notif"'>
|
||||
Notification Lists <span class="badge bg-secondary">@_notificationLists.Count</span>
|
||||
</button>
|
||||
</li>
|
||||
<li class="nav-item" role="presentation">
|
||||
<button class="nav-link @(_tab == "inbound" ? "active" : "")"
|
||||
role="tab"
|
||||
@@ -75,10 +64,6 @@
|
||||
{
|
||||
<div role="tabpanel" id="int-tab-dbconn">@RenderDbConnections()</div>
|
||||
}
|
||||
else if (_tab == "notif")
|
||||
{
|
||||
<div role="tabpanel" id="int-tab-notif">@RenderNotificationLists()</div>
|
||||
}
|
||||
else if (_tab == "inbound")
|
||||
{
|
||||
<div role="tabpanel" id="int-tab-inbound">@RenderInboundApiMethods()</div>
|
||||
@@ -107,15 +92,6 @@
|
||||
? _dbConnections
|
||||
: _dbConnections.Where(dc => dc.Name?.Contains(_dbConnSearch, StringComparison.OrdinalIgnoreCase) ?? false);
|
||||
|
||||
// Notification Lists
|
||||
private List<NotificationList> _notificationLists = new();
|
||||
private Dictionary<int, List<NotificationRecipient>> _recipients = new();
|
||||
private string _notifSearch = "";
|
||||
private IEnumerable<NotificationList> FilteredNotificationLists =>
|
||||
string.IsNullOrWhiteSpace(_notifSearch)
|
||||
? _notificationLists
|
||||
: _notificationLists.Where(n => n.Name?.Contains(_notifSearch, StringComparison.OrdinalIgnoreCase) ?? false);
|
||||
|
||||
// Inbound API Methods
|
||||
private List<ApiMethod> _apiMethods = new();
|
||||
private string _apiMethodSearch = "";
|
||||
@@ -138,15 +114,6 @@
|
||||
{
|
||||
_externalSystems = (await ExternalSystemRepository.GetAllExternalSystemsAsync()).ToList();
|
||||
_dbConnections = (await ExternalSystemRepository.GetAllDatabaseConnectionsAsync()).ToList();
|
||||
_notificationLists = (await NotificationRepository.GetAllNotificationListsAsync()).ToList();
|
||||
|
||||
_recipients.Clear();
|
||||
foreach (var list in _notificationLists)
|
||||
{
|
||||
var recips = await NotificationRepository.GetRecipientsByListIdAsync(list.Id);
|
||||
if (recips.Count > 0) _recipients[list.Id] = recips.ToList();
|
||||
}
|
||||
|
||||
_apiMethods = (await InboundApiRepository.GetAllApiMethodsAsync()).ToList();
|
||||
}
|
||||
catch (Exception ex) { _errorMessage = ex.Message; }
|
||||
@@ -298,87 +265,6 @@
|
||||
catch (Exception ex) { _toast.ShowError(ex.Message); }
|
||||
}
|
||||
|
||||
// ==== Notification Lists ====
|
||||
private RenderFragment RenderNotificationLists() => __builder =>
|
||||
{
|
||||
<div class="d-flex justify-content-between align-items-center mb-2">
|
||||
<h5 class="mb-0">Notification Lists</h5>
|
||||
<button class="btn btn-primary btn-sm" @onclick='() => NavigationManager.NavigateTo("/design/notification-lists/create")'>Add Notification List</button>
|
||||
</div>
|
||||
|
||||
@if (_notificationLists.Count == 0)
|
||||
{
|
||||
<div class="text-center py-5 text-muted">
|
||||
<p class="mb-3">No notification lists configured.</p>
|
||||
<button class="btn btn-primary btn-sm"
|
||||
@onclick='() => NavigationManager.NavigateTo("/design/notification-lists/create")'>
|
||||
Add your first notification list
|
||||
</button>
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="mb-3" style="max-width: 320px;">
|
||||
<input class="form-control form-control-sm"
|
||||
placeholder="Filter by name…"
|
||||
@bind="_notifSearch" @bind:event="oninput" />
|
||||
</div>
|
||||
|
||||
@if (!FilteredNotificationLists.Any())
|
||||
{
|
||||
<p class="text-muted small">No notification lists match the filter.</p>
|
||||
}
|
||||
|
||||
<div class="row g-3">
|
||||
@foreach (var list in FilteredNotificationLists)
|
||||
{
|
||||
var recips = _recipients.GetValueOrDefault(list.Id);
|
||||
<div class="col-lg-6 col-12" @key="list.Id">
|
||||
<div class="card h-100">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-start mb-2">
|
||||
<h5 class="card-title mb-0">@list.Name</h5>
|
||||
<div class="d-flex gap-1">
|
||||
<button class="btn btn-outline-primary btn-sm" @onclick='() => NavigationManager.NavigateTo($"/design/notification-lists/{list.Id}/edit")'>Edit</button>
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-outline-secondary btn-sm"
|
||||
data-bs-toggle="dropdown"
|
||||
aria-expanded="false"
|
||||
aria-label="@($"More actions for {list.Name}")">⋮</button>
|
||||
<ul class="dropdown-menu dropdown-menu-end">
|
||||
<li><button class="dropdown-item text-danger" @onclick="() => DeleteNotifList(list)">Delete</button></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@if (recips == null || recips.Count == 0)
|
||||
{
|
||||
<p class="text-muted small fst-italic mb-0">No recipients.</p>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div>
|
||||
@foreach (var r in recips)
|
||||
{
|
||||
<span class="badge bg-light text-dark me-1 mb-1">@r.Name <@r.EmailAddress></span>
|
||||
}
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
}
|
||||
};
|
||||
|
||||
private async Task DeleteNotifList(NotificationList list)
|
||||
{
|
||||
if (!await Dialog.ConfirmAsync("Delete", $"Delete notification list '{list.Name}'?", danger: true)) return;
|
||||
try { await NotificationRepository.DeleteNotificationListAsync(list.Id); await NotificationRepository.SaveChangesAsync(); _toast.ShowSuccess("Deleted."); await LoadAllAsync(); }
|
||||
catch (Exception ex) { _toast.ShowError(ex.Message); }
|
||||
}
|
||||
|
||||
// ==== Inbound API Methods ====
|
||||
private RenderFragment RenderInboundApiMethods() => __builder =>
|
||||
{
|
||||
|
||||
@@ -4,9 +4,12 @@
|
||||
@using ScadaLink.Commons.Entities.Sites
|
||||
@using ScadaLink.Commons.Interfaces.Repositories
|
||||
@using ScadaLink.HealthMonitoring
|
||||
@using ScadaLink.Commons.Messages.Notification
|
||||
@using ScadaLink.Communication
|
||||
@implements IDisposable
|
||||
@inject ICentralHealthAggregator HealthAggregator
|
||||
@inject ISiteRepository SiteRepository
|
||||
@inject CommunicationService CommunicationService
|
||||
|
||||
<div class="container-fluid mt-3">
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
@@ -17,6 +20,42 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@* Notification Outbox headline KPIs — a central concern, shown regardless of site reports *@
|
||||
<div class="d-flex justify-content-between align-items-center mb-2">
|
||||
<h6 class="text-muted mb-0">Notification Outbox</h6>
|
||||
<a class="small" href="/notifications/kpis">View details →</a>
|
||||
</div>
|
||||
<div class="row g-3 mb-3">
|
||||
<div class="col-lg-4 col-md-6 col-12">
|
||||
<div class="card h-100">
|
||||
<div class="card-body text-center">
|
||||
<h3 class="mb-0">@OutboxTileValue(_outboxKpi.QueueDepth)</h3>
|
||||
<small class="text-muted">Queue Depth</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-4 col-md-6 col-12">
|
||||
<div class="card h-100 @(_outboxKpiAvailable && _outboxKpi.StuckCount > 0 ? "border-warning" : "")">
|
||||
<div class="card-body text-center">
|
||||
<h3 class="mb-0 @(_outboxKpiAvailable && _outboxKpi.StuckCount > 0 ? "text-warning" : "")">@OutboxTileValue(_outboxKpi.StuckCount)</h3>
|
||||
<small class="text-muted">Stuck</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg-4 col-md-6 col-12">
|
||||
<div class="card h-100 @(_outboxKpiAvailable && _outboxKpi.ParkedCount > 0 ? "border-danger" : "")">
|
||||
<div class="card-body text-center">
|
||||
<h3 class="mb-0 @(_outboxKpiAvailable && _outboxKpi.ParkedCount > 0 ? "text-danger" : "")">@OutboxTileValue(_outboxKpi.ParkedCount)</h3>
|
||||
<small class="text-muted">Parked</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@if (!_outboxKpiAvailable && _outboxKpiError != null)
|
||||
{
|
||||
<div class="text-muted small mb-3">Notification Outbox KPIs unavailable: @_outboxKpiError</div>
|
||||
}
|
||||
|
||||
@if (_siteStates.Count == 0)
|
||||
{
|
||||
<div class="alert alert-info">No site health reports received yet.</div>
|
||||
@@ -294,6 +333,20 @@
|
||||
private Timer? _refreshTimer;
|
||||
private int _autoRefreshSeconds = 10;
|
||||
|
||||
// Notification Outbox headline KPIs, refreshed alongside the site states.
|
||||
private NotificationKpiResponse _outboxKpi =
|
||||
new(
|
||||
CorrelationId: string.Empty,
|
||||
Success: false,
|
||||
ErrorMessage: null,
|
||||
QueueDepth: 0,
|
||||
StuckCount: 0,
|
||||
ParkedCount: 0,
|
||||
DeliveredLastInterval: 0,
|
||||
OldestPendingAge: null);
|
||||
private bool _outboxKpiAvailable;
|
||||
private string? _outboxKpiError;
|
||||
|
||||
private static bool SiteHasActiveErrors(SiteHealthState state)
|
||||
{
|
||||
var report = state.LatestReport;
|
||||
@@ -316,22 +369,53 @@
|
||||
// Non-fatal — fall back to showing siteId only
|
||||
}
|
||||
|
||||
RefreshNow();
|
||||
await RefreshNow();
|
||||
_refreshTimer = new Timer(_ =>
|
||||
{
|
||||
InvokeAsync(() =>
|
||||
InvokeAsync(async () =>
|
||||
{
|
||||
RefreshNow();
|
||||
await RefreshNow();
|
||||
StateHasChanged();
|
||||
});
|
||||
}, null, TimeSpan.FromSeconds(_autoRefreshSeconds), TimeSpan.FromSeconds(_autoRefreshSeconds));
|
||||
}
|
||||
|
||||
private void RefreshNow()
|
||||
private async Task RefreshNow()
|
||||
{
|
||||
_siteStates = HealthAggregator.GetAllSiteStates();
|
||||
await LoadOutboxKpis();
|
||||
}
|
||||
|
||||
private async Task LoadOutboxKpis()
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await CommunicationService.GetNotificationKpisAsync(
|
||||
new NotificationKpiRequest(Guid.NewGuid().ToString("N")));
|
||||
if (response.Success)
|
||||
{
|
||||
_outboxKpi = response;
|
||||
_outboxKpiAvailable = true;
|
||||
_outboxKpiError = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
_outboxKpiAvailable = false;
|
||||
_outboxKpiError = response.ErrorMessage ?? "KPI query failed.";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_outboxKpiAvailable = false;
|
||||
_outboxKpiError = $"KPI query failed: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
// Tiles show the numeric KPI when available, or an em dash when the outbox
|
||||
// KPI query failed — matching how the page renders other unavailable data.
|
||||
private string OutboxTileValue(int value) =>
|
||||
_outboxKpiAvailable ? value.ToString() : "—";
|
||||
|
||||
private string GetSiteName(string siteId)
|
||||
{
|
||||
return _siteNames.GetValueOrDefault(siteId, siteId);
|
||||
|
||||
@@ -0,0 +1,209 @@
|
||||
@page "/notifications/kpis"
|
||||
@attribute [Authorize(Policy = ScadaLink.Security.AuthorizationPolicies.RequireDeployment)]
|
||||
@using ScadaLink.Commons.Entities.Sites
|
||||
@using ScadaLink.Commons.Interfaces.Repositories
|
||||
@using ScadaLink.Commons.Messages.Notification
|
||||
@using ScadaLink.Commons.Types.Notifications
|
||||
@using ScadaLink.Communication
|
||||
@inject CommunicationService CommunicationService
|
||||
@inject ISiteRepository SiteRepository
|
||||
@inject ILogger<NotificationKpis> Logger
|
||||
|
||||
<div class="container-fluid mt-3">
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
<h4 class="mb-0">Notification KPIs</h4>
|
||||
<button class="btn btn-outline-secondary btn-sm" @onclick="RefreshAll" disabled="@_loading">
|
||||
@if (_loading) { <span class="spinner-border spinner-border-sm me-1" role="status"></span> }
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@* ── Global KPI tiles ── *@
|
||||
@if (_kpiError != null)
|
||||
{
|
||||
<div class="alert alert-warning py-2">KPIs unavailable: @_kpiError</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="row g-3 mb-4">
|
||||
<div class="col-lg col-md-4 col-6">
|
||||
<div class="card h-100">
|
||||
<div class="card-body text-center py-3">
|
||||
<h3 class="mb-0">@_kpi.QueueDepth</h3>
|
||||
<small class="text-muted">Queue Depth</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg col-md-4 col-6">
|
||||
<div class="card h-100 @(_kpi.StuckCount > 0 ? "border-warning" : "")">
|
||||
<div class="card-body text-center py-3">
|
||||
<h3 class="mb-0 @(_kpi.StuckCount > 0 ? "text-warning" : "")">@_kpi.StuckCount</h3>
|
||||
<small class="text-muted">Stuck</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg col-md-4 col-6">
|
||||
<div class="card h-100 @(_kpi.ParkedCount > 0 ? "border-danger" : "")">
|
||||
<div class="card-body text-center py-3">
|
||||
<h3 class="mb-0 @(_kpi.ParkedCount > 0 ? "text-danger" : "")">@_kpi.ParkedCount</h3>
|
||||
<small class="text-muted">Parked</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg col-md-4 col-6">
|
||||
<div class="card h-100">
|
||||
<div class="card-body text-center py-3">
|
||||
<h3 class="mb-0 text-success">@_kpi.DeliveredLastInterval</h3>
|
||||
<small class="text-muted">Delivered (last interval)</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-lg col-md-4 col-6">
|
||||
<div class="card h-100">
|
||||
<div class="card-body text-center py-3">
|
||||
<h3 class="mb-0">@FormatAge(_kpi.OldestPendingAge)</h3>
|
||||
<small class="text-muted">Oldest Pending Age</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
|
||||
@* ── Per-site breakdown ── *@
|
||||
<h5 class="mb-2">Per-site breakdown</h5>
|
||||
@if (_perSiteError != null)
|
||||
{
|
||||
<div class="alert alert-warning py-2">Per-site KPIs unavailable: @_perSiteError</div>
|
||||
}
|
||||
else if (_perSite.Count == 0)
|
||||
{
|
||||
<div class="card">
|
||||
<div class="card-body text-center text-muted py-4">
|
||||
<div class="small">No per-site activity.</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="table-responsive">
|
||||
<table class="table table-sm table-hover align-middle">
|
||||
<thead class="table-light">
|
||||
<tr>
|
||||
<th>Site</th>
|
||||
<th class="text-end">Queue Depth</th>
|
||||
<th class="text-end">Stuck</th>
|
||||
<th class="text-end">Parked</th>
|
||||
<th class="text-end">Delivered (last interval)</th>
|
||||
<th class="text-end">Oldest Pending Age</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var s in _perSite)
|
||||
{
|
||||
<tr @key="s.SourceSiteId" class="@(s.StuckCount > 0 ? "table-warning" : "")">
|
||||
<td>@SiteName(s.SourceSiteId)</td>
|
||||
<td class="text-end font-monospace">@s.QueueDepth</td>
|
||||
<td class="text-end font-monospace @(s.StuckCount > 0 ? "text-warning" : "")">@s.StuckCount</td>
|
||||
<td class="text-end font-monospace @(s.ParkedCount > 0 ? "text-danger" : "")">@s.ParkedCount</td>
|
||||
<td class="text-end font-monospace text-success">@s.DeliveredLastInterval</td>
|
||||
<td class="text-end font-monospace">@FormatAge(s.OldestPendingAge)</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
|
||||
@code {
|
||||
private List<Site> _sites = new();
|
||||
|
||||
private NotificationKpiResponse _kpi = new(string.Empty, true, null, 0, 0, 0, 0, null);
|
||||
private string? _kpiError;
|
||||
|
||||
private IReadOnlyList<SiteNotificationKpiSnapshot> _perSite = Array.Empty<SiteNotificationKpiSnapshot>();
|
||||
private string? _perSiteError;
|
||||
|
||||
private bool _loading;
|
||||
|
||||
protected override async Task OnInitializedAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
_sites = (await SiteRepository.GetAllSitesAsync()).ToList();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Non-fatal — the per-site table falls back to raw site identifiers.
|
||||
Logger.LogWarning(ex, "Failed to load sites for the KPI per-site breakdown.");
|
||||
}
|
||||
|
||||
await RefreshAll();
|
||||
}
|
||||
|
||||
private async Task RefreshAll()
|
||||
{
|
||||
_loading = true;
|
||||
// Race-free despite both tasks mutating component fields: Blazor Server runs
|
||||
// every continuation on the circuit's single-threaded synchronization context.
|
||||
await Task.WhenAll(LoadGlobalKpis(), LoadPerSiteKpis());
|
||||
_loading = false;
|
||||
}
|
||||
|
||||
private async Task LoadGlobalKpis()
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await CommunicationService.GetNotificationKpisAsync(
|
||||
new NotificationKpiRequest(Guid.NewGuid().ToString("N")));
|
||||
if (response.Success)
|
||||
{
|
||||
_kpi = response;
|
||||
_kpiError = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
_kpiError = response.ErrorMessage ?? "KPI query failed.";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_kpiError = $"KPI query failed: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
private async Task LoadPerSiteKpis()
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await CommunicationService.GetPerSiteNotificationKpisAsync(
|
||||
new PerSiteNotificationKpiRequest(Guid.NewGuid().ToString("N")));
|
||||
if (response.Success)
|
||||
{
|
||||
_perSite = response.Sites;
|
||||
_perSiteError = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
_perSiteError = response.ErrorMessage ?? "Per-site KPI query failed.";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_perSiteError = $"Per-site KPI query failed: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
private string SiteName(string siteId) =>
|
||||
_sites.FirstOrDefault(s => s.SiteIdentifier == siteId)?.Name ?? siteId;
|
||||
|
||||
private static string FormatAge(TimeSpan? age)
|
||||
{
|
||||
if (age == null) return "—";
|
||||
var t = age.Value;
|
||||
if (t.TotalSeconds < 60) return $"{(int)t.TotalSeconds}s";
|
||||
if (t.TotalMinutes < 60) return $"{(int)t.TotalMinutes}m";
|
||||
if (t.TotalHours < 24) return $"{(int)t.TotalHours}h";
|
||||
return $"{(int)t.TotalDays}d";
|
||||
}
|
||||
}
|
||||
+4
-4
@@ -1,5 +1,5 @@
|
||||
@page "/design/notification-lists/create"
|
||||
@page "/design/notification-lists/{Id:int}/edit"
|
||||
@page "/notifications/lists/create"
|
||||
@page "/notifications/lists/{Id:int}/edit"
|
||||
@using ScadaLink.Security
|
||||
@using ScadaLink.Commons.Entities.Notifications
|
||||
@using ScadaLink.Commons.Interfaces.Repositories
|
||||
@@ -147,7 +147,7 @@
|
||||
await NotificationRepository.AddNotificationListAsync(nl);
|
||||
}
|
||||
await NotificationRepository.SaveChangesAsync();
|
||||
NavigationManager.NavigateTo("/design/external-systems");
|
||||
NavigationManager.NavigateTo("/notifications/lists");
|
||||
}
|
||||
catch (Exception ex) { _formError = ex.Message; }
|
||||
}
|
||||
@@ -186,5 +186,5 @@
|
||||
catch (Exception ex) { _recipientFormError = ex.Message; }
|
||||
}
|
||||
|
||||
private void GoBack() => NavigationManager.NavigateTo("/design/external-systems");
|
||||
private void GoBack() => NavigationManager.NavigateTo("/notifications/lists");
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
@page "/notifications/lists"
|
||||
@using ScadaLink.Security
|
||||
@using ScadaLink.Commons.Entities.Notifications
|
||||
@using ScadaLink.Commons.Interfaces.Repositories
|
||||
@attribute [Authorize(Policy = AuthorizationPolicies.RequireDesign)]
|
||||
@inject INotificationRepository NotificationRepository
|
||||
@inject NavigationManager NavigationManager
|
||||
@inject IDialogService Dialog
|
||||
|
||||
<div class="container-fluid mt-3">
|
||||
<ToastNotification @ref="_toast" />
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
<h4 class="mb-0">Notification Lists</h4>
|
||||
<button class="btn btn-primary btn-sm"
|
||||
@onclick='() => NavigationManager.NavigateTo("/notifications/lists/create")'>
|
||||
Add Notification List
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@if (_loading)
|
||||
{
|
||||
<LoadingSpinner IsLoading="true" />
|
||||
}
|
||||
else if (_errorMessage != null)
|
||||
{
|
||||
<div class="alert alert-danger">@_errorMessage</div>
|
||||
}
|
||||
else if (_lists.Count == 0)
|
||||
{
|
||||
<div class="card">
|
||||
<div class="card-body text-center text-muted py-5">
|
||||
<div class="fs-5 mb-2">No notification lists</div>
|
||||
<button class="btn btn-primary btn-sm"
|
||||
@onclick='() => NavigationManager.NavigateTo("/notifications/lists/create")'>
|
||||
Add your first notification list
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="table-responsive">
|
||||
<table class="table table-sm table-hover align-middle">
|
||||
<thead class="table-light">
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Recipients</th>
|
||||
<th class="text-end">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var list in _lists)
|
||||
{
|
||||
var recipients = _recipients.GetValueOrDefault(list.Id)
|
||||
?? (IReadOnlyList<NotificationRecipient>)Array.Empty<NotificationRecipient>();
|
||||
<tr @key="list.Id">
|
||||
<td>@list.Name</td>
|
||||
<td>
|
||||
@if (recipients.Count == 0)
|
||||
{
|
||||
<span class="text-muted small fst-italic">No recipients</span>
|
||||
}
|
||||
else
|
||||
{
|
||||
@foreach (var r in recipients)
|
||||
{
|
||||
<span class="badge bg-light text-dark me-1 mb-1">@r.Name <@r.EmailAddress></span>
|
||||
}
|
||||
}
|
||||
</td>
|
||||
<td class="text-end">
|
||||
<button class="btn btn-outline-primary btn-sm me-1"
|
||||
@onclick='() => NavigationManager.NavigateTo($"/notifications/lists/{list.Id}/edit")'>
|
||||
Edit
|
||||
</button>
|
||||
<button class="btn btn-outline-danger btn-sm"
|
||||
@onclick="() => DeleteList(list)">
|
||||
Delete
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
|
||||
@code {
|
||||
private bool _loading = true;
|
||||
private string? _errorMessage;
|
||||
private List<NotificationList> _lists = new();
|
||||
private readonly Dictionary<int, IReadOnlyList<NotificationRecipient>> _recipients = new();
|
||||
private ToastNotification _toast = default!;
|
||||
|
||||
protected override async Task OnInitializedAsync() => await LoadAsync();
|
||||
|
||||
private async Task LoadAsync()
|
||||
{
|
||||
_loading = true;
|
||||
_errorMessage = null;
|
||||
try
|
||||
{
|
||||
_lists = (await NotificationRepository.GetAllNotificationListsAsync()).ToList();
|
||||
_recipients.Clear();
|
||||
foreach (var list in _lists)
|
||||
{
|
||||
_recipients[list.Id] = await NotificationRepository.GetRecipientsByListIdAsync(list.Id);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_errorMessage = $"Failed to load notification lists: {ex.Message}";
|
||||
}
|
||||
_loading = false;
|
||||
}
|
||||
|
||||
private async Task DeleteList(NotificationList list)
|
||||
{
|
||||
if (!await Dialog.ConfirmAsync("Delete", $"Delete notification list '{list.Name}'?", danger: true))
|
||||
{
|
||||
return;
|
||||
}
|
||||
try
|
||||
{
|
||||
await NotificationRepository.DeleteNotificationListAsync(list.Id);
|
||||
await NotificationRepository.SaveChangesAsync();
|
||||
_toast.ShowSuccess("Deleted.");
|
||||
await LoadAsync();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_toast.ShowError($"Failed to delete notification list: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,395 @@
|
||||
@page "/notifications/report"
|
||||
@attribute [Authorize(Policy = ScadaLink.Security.AuthorizationPolicies.RequireDeployment)]
|
||||
@using ScadaLink.Commons.Entities.Sites
|
||||
@using ScadaLink.Commons.Interfaces.Repositories
|
||||
@using ScadaLink.Commons.Messages.Notification
|
||||
@using ScadaLink.Communication
|
||||
@inject CommunicationService CommunicationService
|
||||
@inject ISiteRepository SiteRepository
|
||||
@inject IDialogService Dialog
|
||||
@inject ILogger<NotificationReport> Logger
|
||||
|
||||
<div class="container-fluid mt-3">
|
||||
<ToastNotification @ref="_toast" />
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
<h4 class="mb-0">Notification Report</h4>
|
||||
<button class="btn btn-outline-secondary btn-sm" @onclick="RefreshAll" disabled="@_loading">
|
||||
@if (_loading) { <span class="spinner-border spinner-border-sm me-1" role="status"></span> }
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@* ── Filters ── *@
|
||||
<div class="card mb-3">
|
||||
<div class="card-body py-2">
|
||||
<div class="row g-2 align-items-end">
|
||||
<div class="col-auto">
|
||||
<label class="form-label small mb-1" for="no-status">Status</label>
|
||||
<select id="no-status" class="form-select form-select-sm" style="min-width: 130px;"
|
||||
@bind="_statusFilter">
|
||||
<option value="">All</option>
|
||||
<option value="Pending">Pending</option>
|
||||
<option value="Retrying">Retrying</option>
|
||||
<option value="Delivered">Delivered</option>
|
||||
<option value="Parked">Parked</option>
|
||||
<option value="Discarded">Discarded</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<label class="form-label small mb-1" for="no-type">Type</label>
|
||||
<select id="no-type" class="form-select form-select-sm" style="min-width: 120px;"
|
||||
@bind="_typeFilter">
|
||||
<option value="">All</option>
|
||||
<option value="Email">Email</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<label class="form-label small mb-1" for="no-site">Source site</label>
|
||||
<select id="no-site" class="form-select form-select-sm" style="min-width: 150px;"
|
||||
@bind="_siteFilter">
|
||||
<option value="">Any</option>
|
||||
@foreach (var site in _sites)
|
||||
{
|
||||
<option value="@site.SiteIdentifier">@site.Name</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<label class="form-label small mb-1" for="no-list">List name</label>
|
||||
<input id="no-list" type="text" class="form-control form-control-sm"
|
||||
style="min-width: 140px;" placeholder="Any" @bind="_listFilter" />
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<label class="form-label small mb-1" for="no-from">From</label>
|
||||
<input id="no-from" type="datetime-local" class="form-control form-control-sm"
|
||||
@bind="_fromFilter" />
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<label class="form-label small mb-1" for="no-to">To</label>
|
||||
<input id="no-to" type="datetime-local" class="form-control form-control-sm"
|
||||
@bind="_toFilter" />
|
||||
</div>
|
||||
<div class="col">
|
||||
<label class="form-label small mb-1" for="no-search">Subject keyword</label>
|
||||
<input id="no-search" type="search" class="form-control form-control-sm"
|
||||
placeholder="Search subject…" @bind="_subjectFilter" />
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<div class="form-check mb-1">
|
||||
<input class="form-check-input" type="checkbox" id="no-stuck-only"
|
||||
@bind="_stuckOnly" />
|
||||
<label class="form-check-label small" for="no-stuck-only">Stuck only</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<button class="btn btn-outline-secondary btn-sm" @onclick="ClearFilters"
|
||||
disabled="@(!HasActiveFilters)">Clear</button>
|
||||
</div>
|
||||
<div class="col-auto">
|
||||
<button class="btn btn-primary btn-sm" @onclick="Search" disabled="@_loading">
|
||||
@if (_loading) { <span class="spinner-border spinner-border-sm me-1" role="status"></span> }
|
||||
Query
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@if (_listError != null)
|
||||
{
|
||||
<div class="alert alert-danger">@_listError</div>
|
||||
}
|
||||
|
||||
@* ── Notification list ── *@
|
||||
@if (_notifications == null)
|
||||
{
|
||||
@if (_loading)
|
||||
{
|
||||
<div class="text-muted small">Loading…</div>
|
||||
}
|
||||
}
|
||||
else if (_notifications.Count == 0)
|
||||
{
|
||||
<div class="card">
|
||||
<div class="card-body text-center text-muted py-5">
|
||||
<div class="fs-5 mb-1">No notifications</div>
|
||||
<div class="small">No notifications match the current filters.</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="table-responsive">
|
||||
<table class="table table-sm table-hover align-middle">
|
||||
<thead class="table-light">
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Type</th>
|
||||
<th>List</th>
|
||||
<th>Subject</th>
|
||||
<th>Status</th>
|
||||
<th class="text-end">Retries</th>
|
||||
<th>Source site</th>
|
||||
<th>Created</th>
|
||||
<th>Delivered</th>
|
||||
<th class="text-end">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var n in _notifications)
|
||||
{
|
||||
<tr @key="n.NotificationId" class="@(n.IsStuck ? "table-warning" : "")">
|
||||
<td><code class="small" title="@n.NotificationId">@ShortId(n.NotificationId)</code></td>
|
||||
<td>@n.Type</td>
|
||||
<td>@n.ListName</td>
|
||||
<td>
|
||||
@n.Subject
|
||||
@if (!string.IsNullOrEmpty(n.LastError))
|
||||
{
|
||||
<div class="small text-danger text-truncate" style="max-width: 320px;"
|
||||
title="@n.LastError">@n.LastError</div>
|
||||
}
|
||||
</td>
|
||||
<td>
|
||||
<span class="badge @StatusBadgeClass(n.Status)">@n.Status</span>
|
||||
@if (n.IsStuck)
|
||||
{
|
||||
<span class="badge bg-warning text-dark ms-1">Stuck</span>
|
||||
}
|
||||
</td>
|
||||
<td class="text-end font-monospace">@n.RetryCount</td>
|
||||
<td><span class="small">@SiteName(n.SourceSiteId)</span></td>
|
||||
<td><TimestampDisplay Value="@n.CreatedAt" Format="yyyy-MM-dd HH:mm" /></td>
|
||||
<td><TimestampDisplay Value="@n.DeliveredAt" Format="yyyy-MM-dd HH:mm" NullText="—" /></td>
|
||||
<td class="text-end">
|
||||
@if (n.Status == "Parked")
|
||||
{
|
||||
<button class="btn btn-outline-success btn-sm me-1"
|
||||
@onclick="() => RetryNotification(n)" disabled="@_actionInProgress">
|
||||
Retry
|
||||
</button>
|
||||
<button class="btn btn-outline-danger btn-sm"
|
||||
@onclick="() => DiscardNotification(n)" disabled="@_actionInProgress">
|
||||
Discard
|
||||
</button>
|
||||
}
|
||||
else
|
||||
{
|
||||
<span class="text-muted small">—</span>
|
||||
}
|
||||
</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
@if (_totalCount > _pageSize)
|
||||
{
|
||||
<div class="d-flex justify-content-between align-items-center">
|
||||
<span class="text-muted small">
|
||||
Page @_pageNumber of @((_totalCount + _pageSize - 1) / _pageSize) · @_totalCount total
|
||||
</span>
|
||||
<div>
|
||||
<button class="btn btn-outline-secondary btn-sm me-1"
|
||||
@onclick="PrevPage" disabled="@(_pageNumber <= 1 || _loading)">Previous</button>
|
||||
<button class="btn btn-outline-secondary btn-sm"
|
||||
@onclick="NextPage" disabled="@(_notifications.Count < _pageSize || _loading)">Next</button>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
}
|
||||
</div>
|
||||
|
||||
@code {
|
||||
private const int _pageSize = 50;
|
||||
|
||||
private ToastNotification _toast = default!;
|
||||
private List<Site> _sites = new();
|
||||
|
||||
// List
|
||||
private List<NotificationSummary>? _notifications;
|
||||
private int _totalCount;
|
||||
private int _pageNumber = 1;
|
||||
private bool _loading;
|
||||
private string? _listError;
|
||||
private bool _actionInProgress;
|
||||
|
||||
// Filters
|
||||
private string _statusFilter = string.Empty;
|
||||
private string _typeFilter = string.Empty;
|
||||
private string _siteFilter = string.Empty;
|
||||
private string _listFilter = string.Empty;
|
||||
private string _subjectFilter = string.Empty;
|
||||
private bool _stuckOnly;
|
||||
private DateTime? _fromFilter;
|
||||
private DateTime? _toFilter;
|
||||
|
||||
protected override async Task OnInitializedAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
_sites = (await SiteRepository.GetAllSitesAsync()).ToList();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Non-fatal — source-site filter just falls back to the raw site IDs.
|
||||
Logger.LogWarning(ex, "Failed to load sites for the report source-site filter.");
|
||||
}
|
||||
|
||||
await RefreshAll();
|
||||
}
|
||||
|
||||
private async Task RefreshAll()
|
||||
{
|
||||
await FetchPage();
|
||||
}
|
||||
|
||||
private async Task Search()
|
||||
{
|
||||
_pageNumber = 1;
|
||||
await FetchPage();
|
||||
}
|
||||
|
||||
private async Task PrevPage() { _pageNumber--; await FetchPage(); }
|
||||
private async Task NextPage() { _pageNumber++; await FetchPage(); }
|
||||
|
||||
private async Task FetchPage()
|
||||
{
|
||||
_loading = true;
|
||||
_listError = null;
|
||||
try
|
||||
{
|
||||
var request = new NotificationOutboxQueryRequest(
|
||||
CorrelationId: Guid.NewGuid().ToString("N"),
|
||||
StatusFilter: NullIfEmpty(_statusFilter),
|
||||
TypeFilter: NullIfEmpty(_typeFilter),
|
||||
SourceSiteFilter: NullIfEmpty(_siteFilter),
|
||||
ListNameFilter: NullIfEmpty(_listFilter),
|
||||
StuckOnly: _stuckOnly,
|
||||
SubjectKeyword: NullIfEmpty(_subjectFilter),
|
||||
From: ToUtc(_fromFilter),
|
||||
To: ToUtc(_toFilter),
|
||||
PageNumber: _pageNumber,
|
||||
PageSize: _pageSize);
|
||||
|
||||
var response = await CommunicationService.QueryNotificationOutboxAsync(request);
|
||||
if (response.Success)
|
||||
{
|
||||
_notifications = response.Notifications.ToList();
|
||||
_totalCount = response.TotalCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
_listError = response.ErrorMessage ?? "Query failed.";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_listError = $"Query failed: {ex.Message}";
|
||||
}
|
||||
_loading = false;
|
||||
}
|
||||
|
||||
private async Task RetryNotification(NotificationSummary n)
|
||||
{
|
||||
var confirmed = await Dialog.ConfirmAsync(
|
||||
"Retry notification",
|
||||
$"Re-queue notification {ShortId(n.NotificationId)} (\"{n.Subject}\") for delivery?");
|
||||
if (!confirmed) return;
|
||||
|
||||
_actionInProgress = true;
|
||||
try
|
||||
{
|
||||
var response = await CommunicationService.RetryNotificationAsync(
|
||||
new RetryNotificationRequest(Guid.NewGuid().ToString("N"), n.NotificationId));
|
||||
if (response.Success)
|
||||
{
|
||||
_toast.ShowSuccess($"Notification {ShortId(n.NotificationId)} re-queued for delivery.");
|
||||
await RefreshAll();
|
||||
}
|
||||
else
|
||||
{
|
||||
_toast.ShowError(response.ErrorMessage ?? "Retry failed.");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_toast.ShowError($"Retry failed: {ex.Message}");
|
||||
}
|
||||
_actionInProgress = false;
|
||||
}
|
||||
|
||||
private async Task DiscardNotification(NotificationSummary n)
|
||||
{
|
||||
var confirmed = await Dialog.ConfirmAsync(
|
||||
"Discard notification",
|
||||
$"Permanently discard notification {ShortId(n.NotificationId)} (\"{n.Subject}\")? This cannot be undone.",
|
||||
danger: true);
|
||||
if (!confirmed) return;
|
||||
|
||||
_actionInProgress = true;
|
||||
try
|
||||
{
|
||||
var response = await CommunicationService.DiscardNotificationAsync(
|
||||
new DiscardNotificationRequest(Guid.NewGuid().ToString("N"), n.NotificationId));
|
||||
if (response.Success)
|
||||
{
|
||||
_toast.ShowSuccess($"Notification {ShortId(n.NotificationId)} discarded.");
|
||||
await RefreshAll();
|
||||
}
|
||||
else
|
||||
{
|
||||
_toast.ShowError(response.ErrorMessage ?? "Discard failed.");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_toast.ShowError($"Discard failed: {ex.Message}");
|
||||
}
|
||||
_actionInProgress = false;
|
||||
}
|
||||
|
||||
private void ClearFilters()
|
||||
{
|
||||
_statusFilter = string.Empty;
|
||||
_typeFilter = string.Empty;
|
||||
_siteFilter = string.Empty;
|
||||
_listFilter = string.Empty;
|
||||
_subjectFilter = string.Empty;
|
||||
_stuckOnly = false;
|
||||
_fromFilter = null;
|
||||
_toFilter = null;
|
||||
}
|
||||
|
||||
private bool HasActiveFilters =>
|
||||
!string.IsNullOrEmpty(_statusFilter) ||
|
||||
!string.IsNullOrEmpty(_typeFilter) ||
|
||||
!string.IsNullOrEmpty(_siteFilter) ||
|
||||
!string.IsNullOrEmpty(_listFilter) ||
|
||||
!string.IsNullOrEmpty(_subjectFilter) ||
|
||||
_stuckOnly ||
|
||||
_fromFilter != null ||
|
||||
_toFilter != null;
|
||||
|
||||
private string SiteName(string siteId) =>
|
||||
_sites.FirstOrDefault(s => s.SiteIdentifier == siteId)?.Name ?? siteId;
|
||||
|
||||
private static string? NullIfEmpty(string s) => string.IsNullOrWhiteSpace(s) ? null : s.Trim();
|
||||
|
||||
private static DateTimeOffset? ToUtc(DateTime? local) =>
|
||||
local == null ? null : new DateTimeOffset(DateTime.SpecifyKind(local.Value, DateTimeKind.Utc));
|
||||
|
||||
private static string ShortId(string id) => id[..Math.Min(12, id.Length)];
|
||||
|
||||
private static string StatusBadgeClass(string status) => status switch
|
||||
{
|
||||
"Delivered" => "bg-success",
|
||||
"Parked" => "bg-danger",
|
||||
"Retrying" => "bg-warning text-dark",
|
||||
"Pending" => "bg-info text-dark",
|
||||
"Discarded" => "bg-secondary",
|
||||
_ => "bg-light text-dark"
|
||||
};
|
||||
}
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
@page "/admin/smtp"
|
||||
@page "/notifications/smtp"
|
||||
@using ScadaLink.Security
|
||||
@using ScadaLink.Commons.Interfaces.Repositories
|
||||
@using SmtpConfigurationEntity = ScadaLink.Commons.Entities.Notifications.SmtpConfiguration
|
||||
@@ -1,5 +1,6 @@
|
||||
using System.Data.Common;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
|
||||
namespace ScadaLink.CentralUI.ScriptAnalysis;
|
||||
|
||||
@@ -80,39 +81,50 @@ public class SandboxDatabaseHelper
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sandbox mirror of <c>ScadaLink.SiteRuntime.Scripts.NotifyHelper</c> — the
|
||||
/// <c>Notify</c> global. Signature-faithful to production so the same user code
|
||||
/// (<c>Notify.To(...).Send(...)</c> / <c>Notify.Status(...)</c>) compiles
|
||||
/// identically against both surfaces.
|
||||
///
|
||||
/// In the Notification Outbox design production no longer delivers notification
|
||||
/// email inline — <c>Notify.Send</c> enqueues into the site Store-and-Forward
|
||||
/// Engine and returns a <c>NotificationId</c>. The sandbox has no S&F engine
|
||||
/// and no central, so it is a pure no-op fake: <c>Send</c> returns a generated
|
||||
/// fake id and <c>Status</c> returns a placeholder <see cref="NotificationDeliveryStatus"/>.
|
||||
/// Nothing is delivered.
|
||||
/// </summary>
|
||||
public class SandboxNotifyHelper
|
||||
{
|
||||
private readonly INotificationDeliveryService? _service;
|
||||
private readonly string _instanceName;
|
||||
|
||||
public SandboxNotifyHelper(INotificationDeliveryService? service, string instanceName)
|
||||
{
|
||||
_service = service;
|
||||
_instanceName = instanceName;
|
||||
}
|
||||
|
||||
/// <summary>Selects the notification list to send to.</summary>
|
||||
public SandboxNotifyTarget To(string listName) =>
|
||||
new(listName, _service, _instanceName);
|
||||
new();
|
||||
|
||||
/// <summary>
|
||||
/// Queries the delivery status of a previously-sent notification. The
|
||||
/// sandbox never delivers, so this always reports the placeholder
|
||||
/// <c>Unknown</c> status — it exists for signature fidelity with
|
||||
/// <c>NotifyHelper.Status</c>.
|
||||
/// </summary>
|
||||
public Task<NotificationDeliveryStatus> Status(string notificationId) =>
|
||||
Task.FromResult(new NotificationDeliveryStatus("Unknown", 0, null, null));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sandbox mirror of <c>ScadaLink.SiteRuntime.Scripts.NotifyTarget</c> — the
|
||||
/// target of <c>Notify.To("listName")</c>.
|
||||
/// </summary>
|
||||
public class SandboxNotifyTarget
|
||||
{
|
||||
private readonly string _listName;
|
||||
private readonly INotificationDeliveryService? _service;
|
||||
private readonly string _instanceName;
|
||||
|
||||
internal SandboxNotifyTarget(string listName, INotificationDeliveryService? service, string instanceName)
|
||||
internal SandboxNotifyTarget()
|
||||
{
|
||||
_listName = listName;
|
||||
_service = service;
|
||||
_instanceName = instanceName;
|
||||
}
|
||||
|
||||
public Task<NotificationResult> Send(string subject, string message, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_service == null)
|
||||
throw new ScriptSandboxException(
|
||||
$"Notify.To(\"{_listName}\").Send(...) — notification service not configured for Test Run.");
|
||||
return _service.SendAsync(_listName, subject, message, _instanceName, cancellationToken);
|
||||
}
|
||||
/// <summary>
|
||||
/// Mirrors <c>NotifyTarget.Send</c> — returns a <c>NotificationId</c>. In
|
||||
/// the sandbox nothing is enqueued or delivered; a fake id is returned so
|
||||
/// the call type-checks identically to production.
|
||||
/// </summary>
|
||||
public Task<string> Send(string subject, string message, CancellationToken cancellationToken = default) =>
|
||||
Task.FromResult(Guid.NewGuid().ToString("N"));
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@ namespace ScadaLink.CentralUI.ScriptAnalysis;
|
||||
/// instance. With no instance bound they throw <see cref="ScriptSandboxException"/>;
|
||||
/// with one bound (see <see cref="SandboxInstanceContext"/>) they route to it.
|
||||
///
|
||||
/// <c>ExternalSystem</c>, <c>Database</c>, <c>Notify</c>, and
|
||||
/// <c>Scripts.CallShared</c> run against central's real services and fire for
|
||||
/// real — they do not depend on a bound instance.
|
||||
/// <c>ExternalSystem</c>, <c>Database</c>, and <c>Scripts.CallShared</c> run
|
||||
/// against central's real services and fire for real; <c>Notify</c> is a
|
||||
/// signature-faithful no-op fake. None of them depend on a bound instance.
|
||||
/// </summary>
|
||||
public class SandboxScriptHost
|
||||
{
|
||||
@@ -58,8 +58,8 @@ public interface ISandboxInstanceGateway
|
||||
/// the <c>Instance</c> global. Attribute and sibling-script access needs a real
|
||||
/// deployed instance: with no gateway wired it throws; with one (a bound
|
||||
/// instance) it routes cross-site. <c>ExternalSystem</c>/<c>Database</c>/
|
||||
/// <c>Notify</c>/<c>Scripts</c> run against central's real services regardless
|
||||
/// of binding.
|
||||
/// <c>Scripts</c> run against central's real services regardless of binding;
|
||||
/// <c>Notify</c> is a signature-faithful no-op fake.
|
||||
/// </summary>
|
||||
public class SandboxInstanceContext
|
||||
{
|
||||
@@ -80,7 +80,7 @@ public class SandboxInstanceContext
|
||||
_gateway = gateway;
|
||||
ExternalSystem = external ?? new SandboxExternalHelper(null, "<sandbox>");
|
||||
Database = database ?? new SandboxDatabaseHelper(null, "<sandbox>");
|
||||
Notify = notify ?? new SandboxNotifyHelper(null, "<sandbox>");
|
||||
Notify = notify ?? new SandboxNotifyHelper();
|
||||
Scripts = scripts ?? new SandboxScriptCallHelper(null);
|
||||
}
|
||||
|
||||
|
||||
@@ -154,11 +154,12 @@ public class ScriptAnalysisService
|
||||
/// scripts against <see cref="SandboxInboundScriptHost"/>.
|
||||
/// Pure logic + the supplied Parameters always work.
|
||||
/// For the SandboxScriptHost surface, <c>Attributes</c> still throws while
|
||||
/// <c>External</c>, <c>Database</c>, and <c>Notify</c> are wired to
|
||||
/// central's real <see cref="IExternalSystemClient"/>,
|
||||
/// <see cref="IDatabaseGateway"/>, and
|
||||
/// <see cref="INotificationDeliveryService"/> — calls fire for real and
|
||||
/// have production-equivalent side effects (HTTP, SQL, SMTP).
|
||||
/// <c>External</c> and <c>Database</c> are wired to central's real
|
||||
/// <see cref="IExternalSystemClient"/> and <see cref="IDatabaseGateway"/> —
|
||||
/// calls fire for real and have production-equivalent side effects (HTTP,
|
||||
/// SQL). <c>Notify</c> is a signature-faithful no-op fake (production
|
||||
/// enqueues into the site Store-and-Forward Engine, which has no
|
||||
/// central-side equivalent in the sandbox).
|
||||
/// <c>CallShared</c> compiles and executes the named shared script in the
|
||||
/// same sandbox, with a recursion limit of
|
||||
/// <see cref="SandboxMaxCallSharedDepth"/>. <c>CallScript</c> still throws
|
||||
@@ -269,10 +270,13 @@ public class ScriptAnalysisService
|
||||
|
||||
var externalClient = _services.GetService<IExternalSystemClient>();
|
||||
var databaseGateway = _services.GetService<IDatabaseGateway>();
|
||||
var notifyService = _services.GetService<INotificationDeliveryService>();
|
||||
var external = new SandboxExternalHelper(externalClient, instanceLabel);
|
||||
var database = new SandboxDatabaseHelper(databaseGateway, instanceLabel);
|
||||
var notify = new SandboxNotifyHelper(notifyService, instanceLabel);
|
||||
// The Notification Outbox sandbox Notify is a pure no-op fake — it
|
||||
// mirrors production signatures so scripts compile identically, but it
|
||||
// does not deliver (production now enqueues into the site S&F engine,
|
||||
// which has no central-side equivalent here).
|
||||
var notify = new SandboxNotifyHelper();
|
||||
|
||||
var compileCache = new Dictionary<string, Script<object>>(StringComparer.Ordinal);
|
||||
var compileCacheLock = new object();
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.Commons.Entities.Notifications;
|
||||
|
||||
/// <summary>
|
||||
/// A single notification queued in the central outbox. Created at a site (where the
|
||||
/// <see cref="NotificationId"/> GUID is generated) and forwarded to the central cluster
|
||||
/// for delivery, retry, and audit. The lifecycle is tracked by <see cref="Status"/>.
|
||||
/// </summary>
|
||||
public class Notification
|
||||
{
|
||||
/// <summary>GUID primary key, generated at the originating site.</summary>
|
||||
public string NotificationId { get; set; }
|
||||
public NotificationType Type { get; set; }
|
||||
public string ListName { get; set; }
|
||||
public string Subject { get; set; }
|
||||
public string Body { get; set; }
|
||||
|
||||
/// <summary>JSON extensibility hook for channel-specific payload data.</summary>
|
||||
public string? TypeData { get; set; }
|
||||
public NotificationStatus Status { get; set; } = NotificationStatus.Pending;
|
||||
public int RetryCount { get; set; }
|
||||
public string? LastError { get; set; }
|
||||
|
||||
/// <summary>Resolved delivery targets snapshotted at delivery time, for audit.</summary>
|
||||
public string? ResolvedTargets { get; set; }
|
||||
public string SourceSiteId { get; set; }
|
||||
public string? SourceInstanceId { get; set; }
|
||||
public string? SourceScript { get; set; }
|
||||
public DateTimeOffset SiteEnqueuedAt { get; set; }
|
||||
|
||||
/// <summary>Central ingest time.</summary>
|
||||
public DateTimeOffset CreatedAt { get; set; }
|
||||
public DateTimeOffset? LastAttemptAt { get; set; }
|
||||
public DateTimeOffset? NextAttemptAt { get; set; }
|
||||
public DateTimeOffset? DeliveredAt { get; set; }
|
||||
|
||||
public Notification(string notificationId, NotificationType type, string listName,
|
||||
string subject, string body, string sourceSiteId)
|
||||
{
|
||||
NotificationId = notificationId ?? throw new ArgumentNullException(nameof(notificationId));
|
||||
Type = type;
|
||||
ListName = listName ?? throw new ArgumentNullException(nameof(listName));
|
||||
Subject = subject ?? throw new ArgumentNullException(nameof(subject));
|
||||
Body = body ?? throw new ArgumentNullException(nameof(body));
|
||||
SourceSiteId = sourceSiteId ?? throw new ArgumentNullException(nameof(sourceSiteId));
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,12 @@
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.Commons.Entities.Notifications;
|
||||
|
||||
public class NotificationList
|
||||
{
|
||||
public int Id { get; set; }
|
||||
public string Name { get; set; }
|
||||
public NotificationType Type { get; set; } = NotificationType.Email;
|
||||
public ICollection<NotificationRecipient> Recipients { get; set; } = new List<NotificationRecipient>();
|
||||
|
||||
public NotificationList(string name)
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
namespace ScadaLink.Commons.Interfaces.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// Data access for the central notification outbox — the queue of <see cref="Notification"/>
|
||||
/// rows the outbox actor drains, retries, and audits. Distinct from
|
||||
/// <see cref="INotificationRepository"/>, which manages notification list configuration.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Persistence model: <see cref="InsertIfNotExistsAsync"/> and <see cref="UpdateAsync"/> commit
|
||||
/// internally, so each call is its own transaction — suited to the outbox actor committing one
|
||||
/// row's status transition at a time. The standalone <see cref="SaveChangesAsync"/> is available
|
||||
/// for callers that stage multiple changes and want to flush them together.
|
||||
/// </remarks>
|
||||
public interface INotificationOutboxRepository
|
||||
{
|
||||
/// <summary>
|
||||
/// Inserts <paramref name="n"/> only if no row with the same
|
||||
/// <see cref="Notification.NotificationId"/> exists. Returns <c>true</c> when a new
|
||||
/// row was inserted, <c>false</c> when an existing row was left untouched.
|
||||
/// Commits internally — this call is its own transaction.
|
||||
/// </summary>
|
||||
Task<bool> InsertIfNotExistsAsync(Notification n, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Returns notifications ready for a delivery attempt: <c>Pending</c> rows, plus
|
||||
/// <c>Retrying</c> rows whose <c>NextAttemptAt</c> is at or before <paramref name="now"/>.
|
||||
/// Terminal rows are excluded. Ordered by <c>CreatedAt</c> ascending, capped at
|
||||
/// <paramref name="batchSize"/>.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<Notification>> GetDueAsync(DateTimeOffset now, int batchSize, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>Returns the notification with the given id, or <c>null</c>.</summary>
|
||||
Task<Notification?> GetByIdAsync(string notificationId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Marks <paramref name="n"/> modified and persists it (status transitions).
|
||||
/// Commits internally — this call is its own transaction.
|
||||
/// </summary>
|
||||
Task UpdateAsync(Notification n, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Returns a page of notifications matching <paramref name="filter"/>, ordered by
|
||||
/// <c>CreatedAt</c> descending, together with the total matching count.
|
||||
/// </summary>
|
||||
Task<(IReadOnlyList<Notification> Rows, int TotalCount)> QueryAsync(
|
||||
NotificationOutboxFilter filter, int pageNumber, int pageSize, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Bulk-deletes terminal rows (Delivered/Parked/Discarded) whose <c>CreatedAt</c> is
|
||||
/// older than <paramref name="cutoff"/>. Returns the number of rows deleted.
|
||||
/// </summary>
|
||||
Task<int> DeleteTerminalOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Computes a point-in-time <see cref="NotificationKpiSnapshot"/>. The stuck and
|
||||
/// delivered cutoffs are supplied by the caller; the current time used for
|
||||
/// <c>OldestPendingAge</c> is captured inside the method.
|
||||
/// </summary>
|
||||
Task<NotificationKpiSnapshot> ComputeKpisAsync(
|
||||
DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Computes a point-in-time <see cref="SiteNotificationKpiSnapshot"/> per source site.
|
||||
/// Sites with no notification rows at all are omitted. The stuck and delivered cutoffs
|
||||
/// are supplied by the caller; the current time used for <c>OldestPendingAge</c> is
|
||||
/// captured inside the method.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<SiteNotificationKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Persists pending changes tracked on the underlying context. Use this when staging
|
||||
/// multiple changes for a single commit; the individual mutating methods on this
|
||||
/// interface already commit on their own.
|
||||
/// </summary>
|
||||
Task<int> SaveChangesAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
namespace ScadaLink.Commons.Messages.Notification;
|
||||
|
||||
/// <summary>
|
||||
/// Site -> Central: submit a notification for central delivery.
|
||||
/// Fire-and-forget with ack; the site retries until a <see cref="NotificationSubmitAck"/> is received.
|
||||
/// </summary>
|
||||
public record NotificationSubmit(
|
||||
string NotificationId,
|
||||
string ListName,
|
||||
string Subject,
|
||||
string Body,
|
||||
string SourceSiteId,
|
||||
string? SourceInstanceId,
|
||||
string? SourceScript,
|
||||
DateTimeOffset SiteEnqueuedAt);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Site: ack sent after the notification row is persisted.
|
||||
/// Idempotent — safe to re-send for the same <see cref="NotificationId"/>.
|
||||
/// </summary>
|
||||
public record NotificationSubmitAck(
|
||||
string NotificationId,
|
||||
bool Accepted,
|
||||
string? Error);
|
||||
|
||||
/// <summary>
|
||||
/// Site -> Central: query the central delivery status for a <see cref="NotificationId"/>.
|
||||
/// </summary>
|
||||
public record NotificationStatusQuery(
|
||||
string CorrelationId,
|
||||
string NotificationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Site: response carrying the current delivery status for a queried notification.
|
||||
/// </summary>
|
||||
public record NotificationStatusResponse(
|
||||
string CorrelationId,
|
||||
bool Found,
|
||||
string Status,
|
||||
int RetryCount,
|
||||
string? LastError,
|
||||
DateTimeOffset? DeliveredAt);
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox: the delivery status of a notification, as returned to a
|
||||
/// script by <c>Notify.Status(id)</c>.
|
||||
///
|
||||
/// <see cref="Status"/> is either a central status (<c>Pending</c>, <c>Retrying</c>,
|
||||
/// <c>Delivered</c>, <c>Parked</c>, <c>Discarded</c>), the site-local <c>Forwarding</c>
|
||||
/// state (the notification is still buffered at the site and has not yet been
|
||||
/// forwarded/acked), or <c>Unknown</c> (no central row and not buffered locally).
|
||||
/// </summary>
|
||||
public record NotificationDeliveryStatus(
|
||||
string Status,
|
||||
int RetryCount,
|
||||
string? LastError,
|
||||
DateTimeOffset? DeliveredAt);
|
||||
@@ -0,0 +1,115 @@
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
namespace ScadaLink.Commons.Messages.Notification;
|
||||
|
||||
/// <summary>
|
||||
/// Outbox UI -> Central: paginated, filtered query over the notification outbox.
|
||||
/// All filter fields are optional; <see cref="StuckOnly"/> restricts results to stuck notifications.
|
||||
/// </summary>
|
||||
public record NotificationOutboxQueryRequest(
|
||||
string CorrelationId,
|
||||
string? StatusFilter,
|
||||
string? TypeFilter,
|
||||
string? SourceSiteFilter,
|
||||
string? ListNameFilter,
|
||||
bool StuckOnly,
|
||||
string? SubjectKeyword,
|
||||
DateTimeOffset? From,
|
||||
DateTimeOffset? To,
|
||||
int PageNumber,
|
||||
int PageSize);
|
||||
|
||||
/// <summary>
|
||||
/// A single notification row summarised for outbox UI display.
|
||||
/// </summary>
|
||||
public record NotificationSummary(
|
||||
string NotificationId,
|
||||
string Type,
|
||||
string ListName,
|
||||
string Subject,
|
||||
string Status,
|
||||
int RetryCount,
|
||||
string? LastError,
|
||||
string SourceSiteId,
|
||||
string? SourceInstanceId,
|
||||
DateTimeOffset CreatedAt,
|
||||
DateTimeOffset? DeliveredAt,
|
||||
bool IsStuck);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Outbox UI: paginated response for a <see cref="NotificationOutboxQueryRequest"/>.
|
||||
/// </summary>
|
||||
public record NotificationOutboxQueryResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
IReadOnlyList<NotificationSummary> Notifications,
|
||||
int TotalCount);
|
||||
|
||||
/// <summary>
|
||||
/// Outbox UI -> Central: request to immediately retry delivery of a notification.
|
||||
/// </summary>
|
||||
public record RetryNotificationRequest(
|
||||
string CorrelationId,
|
||||
string NotificationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Outbox UI: result of a <see cref="RetryNotificationRequest"/>.
|
||||
/// </summary>
|
||||
public record RetryNotificationResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage);
|
||||
|
||||
/// <summary>
|
||||
/// Outbox UI -> Central: request to discard (cancel) a pending or stuck notification.
|
||||
/// </summary>
|
||||
public record DiscardNotificationRequest(
|
||||
string CorrelationId,
|
||||
string NotificationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Outbox UI: result of a <see cref="DiscardNotificationRequest"/>.
|
||||
/// </summary>
|
||||
public record DiscardNotificationResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage);
|
||||
|
||||
/// <summary>
|
||||
/// Outbox UI -> Central: request for the notification outbox KPI summary.
|
||||
/// </summary>
|
||||
public record NotificationKpiRequest(
|
||||
string CorrelationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Outbox UI: KPI summary for the notification outbox dashboard.
|
||||
/// On a repository fault <see cref="Success"/> is <c>false</c>, <see cref="ErrorMessage"/>
|
||||
/// carries the cause, and the KPI fields are zeroed/<c>null</c>.
|
||||
/// </summary>
|
||||
public record NotificationKpiResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
int QueueDepth,
|
||||
int StuckCount,
|
||||
int ParkedCount,
|
||||
int DeliveredLastInterval,
|
||||
TimeSpan? OldestPendingAge);
|
||||
|
||||
/// <summary>
|
||||
/// Outbox UI -> Central: request for the per-source-site notification outbox KPI breakdown.
|
||||
/// </summary>
|
||||
public record PerSiteNotificationKpiRequest(
|
||||
string CorrelationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Outbox UI: per-site KPI breakdown for the Notification KPIs page.
|
||||
/// On a repository fault <see cref="Success"/> is <c>false</c>, <see cref="ErrorMessage"/>
|
||||
/// carries the cause, and <see cref="Sites"/> is empty.
|
||||
/// </summary>
|
||||
public record PerSiteNotificationKpiResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
IReadOnlyList<SiteNotificationKpiSnapshot> Sites);
|
||||
@@ -0,0 +1,14 @@
|
||||
namespace ScadaLink.Commons.Types.Enums;
|
||||
|
||||
/// <summary>
|
||||
/// Lifecycle status of a notification in the central outbox. The site-local
|
||||
/// <c>Forwarding</c> concept is intentionally not part of the central status set.
|
||||
/// </summary>
|
||||
public enum NotificationStatus
|
||||
{
|
||||
Pending,
|
||||
Retrying,
|
||||
Delivered,
|
||||
Parked,
|
||||
Discarded
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
namespace ScadaLink.Commons.Types.Enums;
|
||||
|
||||
/// <summary>
|
||||
/// Delivery channel for a notification. Currently only email is supported.
|
||||
/// </summary>
|
||||
public enum NotificationType
|
||||
{
|
||||
Email
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
namespace ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
/// <summary>
|
||||
/// Point-in-time operational metrics for the central notification outbox,
|
||||
/// surfaced on the health dashboard.
|
||||
/// </summary>
|
||||
/// <param name="QueueDepth">Count of non-terminal rows (Pending + Retrying).</param>
|
||||
/// <param name="StuckCount">
|
||||
/// Count of non-terminal rows (Pending/Retrying) whose <c>CreatedAt</c> is older
|
||||
/// than the supplied stuck cutoff.
|
||||
/// </param>
|
||||
/// <param name="ParkedCount">Count of rows in the Parked status.</param>
|
||||
/// <param name="DeliveredLastInterval">
|
||||
/// Count of Delivered rows whose <c>DeliveredAt</c> is at or after the supplied
|
||||
/// "delivered since" timestamp.
|
||||
/// </param>
|
||||
/// <param name="OldestPendingAge">
|
||||
/// Age of the oldest non-terminal row (<c>now - min(CreatedAt)</c>), or <c>null</c>
|
||||
/// when there are no non-terminal rows.
|
||||
/// </param>
|
||||
public record NotificationKpiSnapshot(
|
||||
int QueueDepth,
|
||||
int StuckCount,
|
||||
int ParkedCount,
|
||||
int DeliveredLastInterval,
|
||||
TimeSpan? OldestPendingAge);
|
||||
@@ -0,0 +1,30 @@
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
/// <summary>
|
||||
/// Query filter for the central notification outbox. All members are optional;
|
||||
/// an unset member means "no constraint on that dimension".
|
||||
/// </summary>
|
||||
/// <param name="Status">Restrict to a single lifecycle status.</param>
|
||||
/// <param name="Type">Restrict to a single delivery channel.</param>
|
||||
/// <param name="SourceSiteId">Restrict to notifications originating at a given site.</param>
|
||||
/// <param name="ListName">Restrict to a single notification list.</param>
|
||||
/// <param name="SubjectKeyword">Substring matched against <c>Subject</c>.</param>
|
||||
/// <param name="StuckOnly">
|
||||
/// When <c>true</c>, restrict to non-terminal rows (Pending/Retrying) whose
|
||||
/// <c>CreatedAt</c> is older than <see cref="StuckCutoff"/>.
|
||||
/// </param>
|
||||
/// <param name="StuckCutoff">Rows with <c>CreatedAt</c> older than this count as stuck.</param>
|
||||
/// <param name="From">Inclusive lower bound on <c>CreatedAt</c>.</param>
|
||||
/// <param name="To">Inclusive upper bound on <c>CreatedAt</c>.</param>
|
||||
public record NotificationOutboxFilter(
|
||||
NotificationStatus? Status = null,
|
||||
NotificationType? Type = null,
|
||||
string? SourceSiteId = null,
|
||||
string? ListName = null,
|
||||
string? SubjectKeyword = null,
|
||||
bool StuckOnly = false,
|
||||
DateTimeOffset? StuckCutoff = null,
|
||||
DateTimeOffset? From = null,
|
||||
DateTimeOffset? To = null);
|
||||
@@ -0,0 +1,27 @@
|
||||
namespace ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
/// <summary>
|
||||
/// Point-in-time notification-outbox metrics scoped to a single source site.
|
||||
/// The per-site counterpart of <see cref="NotificationKpiSnapshot"/>; surfaced
|
||||
/// in the per-site breakdown table on the Notification KPIs page.
|
||||
/// </summary>
|
||||
/// <param name="SourceSiteId">The site identifier these metrics are scoped to.</param>
|
||||
/// <param name="QueueDepth">Count of this site's non-terminal rows (Pending + Retrying).</param>
|
||||
/// <param name="StuckCount">
|
||||
/// Count of this site's non-terminal rows whose <c>CreatedAt</c> is older than the stuck cutoff.
|
||||
/// </param>
|
||||
/// <param name="ParkedCount">Count of this site's rows in the Parked status.</param>
|
||||
/// <param name="DeliveredLastInterval">
|
||||
/// Count of this site's Delivered rows whose <c>DeliveredAt</c> is at or after the
|
||||
/// "delivered since" timestamp.
|
||||
/// </param>
|
||||
/// <param name="OldestPendingAge">
|
||||
/// Age of this site's oldest non-terminal row, or <c>null</c> when it has none.
|
||||
/// </param>
|
||||
public record SiteNotificationKpiSnapshot(
|
||||
string SourceSiteId,
|
||||
int QueueDepth,
|
||||
int StuckCount,
|
||||
int ParkedCount,
|
||||
int DeliveredLastInterval,
|
||||
TimeSpan? OldestPendingAge);
|
||||
@@ -7,6 +7,7 @@ using Microsoft.Extensions.DependencyInjection;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Communication;
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
|
||||
namespace ScadaLink.Communication.Actors;
|
||||
@@ -66,6 +67,15 @@ public class CentralCommunicationActor : ReceiveActor
|
||||
|
||||
private ICancelable? _refreshSchedule;
|
||||
|
||||
/// <summary>
|
||||
/// Proxy <see cref="IActorRef"/> for the central NotificationOutboxActor cluster singleton.
|
||||
/// Set via <see cref="RegisterNotificationOutbox"/> — the Host creates the singleton proxy
|
||||
/// after this actor and registers it (mirrors how the site-side actor receives its
|
||||
/// runtime <see cref="IActorRef"/>s). Null until registration completes; a notification
|
||||
/// arriving before then is rejected with a non-accepted ack so the site retries.
|
||||
/// </summary>
|
||||
private IActorRef? _notificationOutboxProxy;
|
||||
|
||||
/// <summary>
|
||||
/// DistributedPubSub topic used to fan health reports out to the peer
|
||||
/// central node so both per-node aggregators stay in sync. See
|
||||
@@ -105,6 +115,61 @@ public class CentralCommunicationActor : ReceiveActor
|
||||
// Route enveloped messages to sites
|
||||
Receive<SiteEnvelope>(HandleSiteEnvelope);
|
||||
|
||||
// Notification Outbox: the Host registers the outbox singleton proxy after this
|
||||
// actor is created (the proxy cannot exist before this actor's construction).
|
||||
Receive<RegisterNotificationOutbox>(msg =>
|
||||
{
|
||||
_notificationOutboxProxy = msg.OutboxProxy;
|
||||
_log.Info("Registered notification outbox proxy");
|
||||
});
|
||||
|
||||
// Notification Outbox ingest: a site forwards a buffered NotificationSubmit to the
|
||||
// central cluster via ClusterClient. Forward to the outbox proxy so the original
|
||||
// Sender (the site's ClusterClient path) is preserved and the NotificationSubmitAck
|
||||
// routes straight back to the site.
|
||||
Receive<NotificationSubmit>(HandleNotificationSubmit);
|
||||
|
||||
// Notification Outbox status query: forward to the outbox proxy, preserving Sender
|
||||
// so the NotificationStatusResponse routes back to the querying site.
|
||||
Receive<NotificationStatusQuery>(HandleNotificationStatusQuery);
|
||||
|
||||
}
|
||||
|
||||
private void HandleNotificationSubmit(NotificationSubmit msg)
|
||||
{
|
||||
if (_notificationOutboxProxy == null)
|
||||
{
|
||||
// No outbox proxy registered yet. A non-accepted ack makes the site's
|
||||
// Store-and-Forward forwarder treat this as transient and retry later.
|
||||
_log.Warning(
|
||||
"Cannot route NotificationSubmit {0} — notification outbox not available",
|
||||
msg.NotificationId);
|
||||
Sender.Tell(new NotificationSubmitAck(
|
||||
msg.NotificationId, Accepted: false, Error: "notification outbox not available"));
|
||||
return;
|
||||
}
|
||||
|
||||
_log.Debug("Routing NotificationSubmit {0} to the notification outbox", msg.NotificationId);
|
||||
_notificationOutboxProxy.Forward(msg);
|
||||
}
|
||||
|
||||
private void HandleNotificationStatusQuery(NotificationStatusQuery msg)
|
||||
{
|
||||
if (_notificationOutboxProxy == null)
|
||||
{
|
||||
// No outbox proxy registered yet. Reply Found: false so the querying site
|
||||
// falls back to its local Store-and-Forward buffer to resolve the status.
|
||||
_log.Warning(
|
||||
"Cannot route NotificationStatusQuery {0} — notification outbox not available",
|
||||
msg.NotificationId);
|
||||
Sender.Tell(new NotificationStatusResponse(
|
||||
msg.CorrelationId, Found: false, Status: "Unknown",
|
||||
RetryCount: 0, LastError: null, DeliveredAt: null));
|
||||
return;
|
||||
}
|
||||
|
||||
_log.Debug("Routing NotificationStatusQuery {0} to the notification outbox", msg.NotificationId);
|
||||
_notificationOutboxProxy.Forward(msg);
|
||||
}
|
||||
|
||||
private void HandleHeartbeat(HeartbeatMessage heartbeat)
|
||||
@@ -391,3 +456,11 @@ internal record SiteAddressCacheLoaded(Dictionary<string, List<string>> SiteCont
|
||||
/// due to site disconnection (WP-5).
|
||||
/// </summary>
|
||||
public record DebugStreamTerminated(string SiteId, string CorrelationId);
|
||||
|
||||
/// <summary>
|
||||
/// Registers the central NotificationOutboxActor singleton proxy with the
|
||||
/// <see cref="CentralCommunicationActor"/> so site-forwarded <see cref="NotificationSubmit"/>
|
||||
/// and <see cref="NotificationStatusQuery"/> messages can be routed to it. Sent by the Host
|
||||
/// after the outbox singleton proxy is created.
|
||||
/// </summary>
|
||||
public record RegisterNotificationOutbox(IActorRef OutboxProxy);
|
||||
|
||||
@@ -8,6 +8,7 @@ using ScadaLink.Commons.Messages.Health;
|
||||
using ScadaLink.Commons.Messages.InboundApi;
|
||||
using ScadaLink.Commons.Messages.Integration;
|
||||
using ScadaLink.Commons.Messages.Lifecycle;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Messages.RemoteQuery;
|
||||
|
||||
namespace ScadaLink.Communication.Actors;
|
||||
@@ -165,6 +166,54 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
});
|
||||
|
||||
// Notification Outbox: forward a buffered notification submitted by the site
|
||||
// Store-and-Forward Engine to the central cluster. The original Sender (the
|
||||
// S&F forwarder's Ask) is forwarded as the ClusterClient.Send sender so the
|
||||
// NotificationSubmitAck routes straight back to the waiting Ask, not here.
|
||||
Receive<NotificationSubmit>(msg =>
|
||||
{
|
||||
if (_centralClient == null)
|
||||
{
|
||||
// No ClusterClient registered yet (e.g. central contact points not
|
||||
// configured, or registration not yet completed). A non-accepted ack
|
||||
// makes the S&F forwarder treat this as transient and retry later.
|
||||
_log.Warning(
|
||||
"Cannot forward NotificationSubmit {0} — no central ClusterClient registered",
|
||||
msg.NotificationId);
|
||||
Sender.Tell(new NotificationSubmitAck(
|
||||
msg.NotificationId, Accepted: false, Error: "Central ClusterClient not registered"));
|
||||
return;
|
||||
}
|
||||
|
||||
_log.Debug("Forwarding NotificationSubmit {0} to central", msg.NotificationId);
|
||||
_centralClient.Tell(
|
||||
new ClusterClient.Send("/user/central-communication", msg), Sender);
|
||||
});
|
||||
|
||||
// Notification Outbox: forward a Notify.Status query to the central cluster.
|
||||
// The original Sender (the Notify helper's Ask) is forwarded as the
|
||||
// ClusterClient.Send sender so the NotificationStatusResponse routes straight
|
||||
// back to the waiting Ask, not here.
|
||||
Receive<NotificationStatusQuery>(msg =>
|
||||
{
|
||||
if (_centralClient == null)
|
||||
{
|
||||
// No ClusterClient registered yet. Reply Found: false so Notify.Status
|
||||
// falls back to the site S&F buffer to decide Forwarding vs Unknown.
|
||||
_log.Warning(
|
||||
"Cannot forward NotificationStatusQuery {0} — no central ClusterClient registered",
|
||||
msg.NotificationId);
|
||||
Sender.Tell(new NotificationStatusResponse(
|
||||
msg.CorrelationId, Found: false, Status: "Unknown",
|
||||
RetryCount: 0, LastError: null, DeliveredAt: null));
|
||||
return;
|
||||
}
|
||||
|
||||
_log.Debug("Forwarding NotificationStatusQuery {0} to central", msg.NotificationId);
|
||||
_centralClient.Tell(
|
||||
new ClusterClient.Send("/user/central-communication", msg), Sender);
|
||||
});
|
||||
|
||||
// Internal: send heartbeat tick
|
||||
Receive<SendHeartbeat>(_ => SendHeartbeatToCentral());
|
||||
|
||||
|
||||
@@ -27,6 +27,14 @@ public class CommunicationOptions
|
||||
/// <summary>Timeout for health report acknowledgement (fire-and-forget, but bounded).</summary>
|
||||
public TimeSpan HealthReportTimeout { get; set; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox: timeout for forwarding a buffered notification to central
|
||||
/// and awaiting its <c>NotificationSubmitAck</c>. A timeout is treated as a
|
||||
/// transient failure — the Store-and-Forward engine keeps the message buffered
|
||||
/// and retries the forward at the fixed retry interval.
|
||||
/// </summary>
|
||||
public TimeSpan NotificationForwardTimeout { get; set; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Contact point addresses for the central cluster (e.g. "akka.tcp://scadalink@central-a:8081").
|
||||
/// Used by site nodes to create a ClusterClient for reaching central.
|
||||
|
||||
@@ -8,6 +8,7 @@ using ScadaLink.Commons.Messages.Health;
|
||||
using ScadaLink.Commons.Messages.InboundApi;
|
||||
using ScadaLink.Commons.Messages.Integration;
|
||||
using ScadaLink.Commons.Messages.Lifecycle;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Messages.RemoteQuery;
|
||||
using ScadaLink.Communication.Actors;
|
||||
|
||||
@@ -23,6 +24,7 @@ public class CommunicationService
|
||||
private readonly CommunicationOptions _options;
|
||||
private readonly ILogger<CommunicationService> _logger;
|
||||
private IActorRef? _centralCommunicationActor;
|
||||
private IActorRef? _notificationOutboxProxy;
|
||||
|
||||
public CommunicationService(
|
||||
IOptions<CommunicationOptions> options,
|
||||
@@ -40,6 +42,16 @@ public class CommunicationService
|
||||
_centralCommunicationActor = centralCommunicationActor;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets the notification-outbox singleton proxy reference. Called during actor
|
||||
/// system startup. The outbox actor is central-local, so outbox calls Ask this
|
||||
/// proxy directly (no SiteEnvelope routing).
|
||||
/// </summary>
|
||||
public void SetNotificationOutbox(IActorRef notificationOutboxProxy)
|
||||
{
|
||||
_notificationOutboxProxy = notificationOutboxProxy;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Triggers an immediate refresh of the site address cache from the database.
|
||||
/// </summary>
|
||||
@@ -59,6 +71,15 @@ public class CommunicationService
|
||||
|
||||
private IActorRef GetActor() => GetCommunicationActor();
|
||||
|
||||
/// <summary>
|
||||
/// Gets the notification-outbox proxy reference. Throws if not yet initialized.
|
||||
/// </summary>
|
||||
private IActorRef GetNotificationOutbox()
|
||||
{
|
||||
return _notificationOutboxProxy
|
||||
?? throw new InvalidOperationException("CommunicationService not initialized. NotificationOutbox proxy not set.");
|
||||
}
|
||||
|
||||
// ── Pattern 1: Instance Deployment ──
|
||||
|
||||
public async Task<DeploymentStatusResponse> DeployInstanceAsync(
|
||||
@@ -230,6 +251,43 @@ public class CommunicationService
|
||||
return await GetActor().Ask<RouteToSetAttributesResponse>(
|
||||
envelope, _options.IntegrationTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
// ── Notification Outbox (central-local actor — Asked directly, no SiteEnvelope) ──
|
||||
|
||||
public async Task<NotificationOutboxQueryResponse> QueryNotificationOutboxAsync(
|
||||
NotificationOutboxQueryRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetNotificationOutbox().Ask<NotificationOutboxQueryResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<RetryNotificationResponse> RetryNotificationAsync(
|
||||
RetryNotificationRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetNotificationOutbox().Ask<RetryNotificationResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<DiscardNotificationResponse> DiscardNotificationAsync(
|
||||
DiscardNotificationRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetNotificationOutbox().Ask<DiscardNotificationResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<NotificationKpiResponse> GetNotificationKpisAsync(
|
||||
NotificationKpiRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetNotificationOutbox().Ask<NotificationKpiResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<PerSiteNotificationKpiResponse> GetPerSiteNotificationKpisAsync(
|
||||
PerSiteNotificationKpiRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetNotificationOutbox().Ask<PerSiteNotificationKpiResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -14,6 +14,11 @@ public class NotificationListConfiguration : IEntityTypeConfiguration<Notificati
|
||||
.IsRequired()
|
||||
.HasMaxLength(200);
|
||||
|
||||
builder.Property(n => n.Type)
|
||||
.HasConversion<string>()
|
||||
.HasMaxLength(32)
|
||||
.IsRequired();
|
||||
|
||||
builder.HasMany(n => n.Recipients)
|
||||
.WithOne()
|
||||
.HasForeignKey(r => r.NotificationListId)
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.EntityFrameworkCore.Metadata.Builders;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Configurations;
|
||||
|
||||
/// <summary>
|
||||
/// EF Core mapping for the central notification outbox entity. <see cref="Notification.TypeData"/>
|
||||
/// and <see cref="Notification.ResolvedTargets"/> are intentionally left unconstrained
|
||||
/// (nullable nvarchar(max)) as they carry variable-length JSON / target snapshots.
|
||||
/// </summary>
|
||||
public class NotificationOutboxConfiguration : IEntityTypeConfiguration<Notification>
|
||||
{
|
||||
public void Configure(EntityTypeBuilder<Notification> builder)
|
||||
{
|
||||
builder.HasKey(n => n.NotificationId);
|
||||
|
||||
builder.Property(n => n.NotificationId).HasMaxLength(64);
|
||||
|
||||
builder.Property(n => n.Type)
|
||||
.HasConversion<string>()
|
||||
.HasMaxLength(32)
|
||||
.IsRequired();
|
||||
|
||||
builder.Property(n => n.Status)
|
||||
.HasConversion<string>()
|
||||
.HasMaxLength(32)
|
||||
.IsRequired();
|
||||
|
||||
builder.Property(n => n.ListName)
|
||||
.HasMaxLength(200)
|
||||
.IsRequired();
|
||||
|
||||
builder.Property(n => n.Subject)
|
||||
.HasMaxLength(1000)
|
||||
.IsRequired();
|
||||
|
||||
builder.Property(n => n.Body).IsRequired();
|
||||
|
||||
builder.Property(n => n.LastError).HasMaxLength(4000);
|
||||
|
||||
builder.Property(n => n.SourceSiteId)
|
||||
.HasMaxLength(100)
|
||||
.IsRequired();
|
||||
|
||||
builder.Property(n => n.SourceInstanceId).HasMaxLength(200);
|
||||
|
||||
builder.Property(n => n.SourceScript).HasMaxLength(200);
|
||||
|
||||
builder.HasIndex(n => new { n.Status, n.NextAttemptAt });
|
||||
|
||||
builder.HasIndex(n => new { n.SourceSiteId, n.CreatedAt });
|
||||
}
|
||||
}
|
||||
Generated
+1436
File diff suppressed because it is too large
Load Diff
+72
@@ -0,0 +1,72 @@
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Migrations
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public partial class AddNotificationsTable : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.AddColumn<string>(
|
||||
name: "Type",
|
||||
table: "NotificationLists",
|
||||
type: "nvarchar(32)",
|
||||
maxLength: 32,
|
||||
nullable: false,
|
||||
defaultValue: "Email");
|
||||
|
||||
migrationBuilder.CreateTable(
|
||||
name: "Notifications",
|
||||
columns: table => new
|
||||
{
|
||||
NotificationId = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
|
||||
Type = table.Column<string>(type: "nvarchar(32)", maxLength: 32, nullable: false),
|
||||
ListName = table.Column<string>(type: "nvarchar(200)", maxLength: 200, nullable: false),
|
||||
Subject = table.Column<string>(type: "nvarchar(1000)", maxLength: 1000, nullable: false),
|
||||
Body = table.Column<string>(type: "nvarchar(max)", nullable: false),
|
||||
TypeData = table.Column<string>(type: "nvarchar(max)", nullable: true),
|
||||
Status = table.Column<string>(type: "nvarchar(32)", maxLength: 32, nullable: false),
|
||||
RetryCount = table.Column<int>(type: "int", nullable: false),
|
||||
LastError = table.Column<string>(type: "nvarchar(4000)", maxLength: 4000, nullable: true),
|
||||
ResolvedTargets = table.Column<string>(type: "nvarchar(max)", nullable: true),
|
||||
SourceSiteId = table.Column<string>(type: "nvarchar(100)", maxLength: 100, nullable: false),
|
||||
SourceInstanceId = table.Column<string>(type: "nvarchar(200)", maxLength: 200, nullable: true),
|
||||
SourceScript = table.Column<string>(type: "nvarchar(200)", maxLength: 200, nullable: true),
|
||||
SiteEnqueuedAt = table.Column<DateTimeOffset>(type: "datetimeoffset", nullable: false),
|
||||
CreatedAt = table.Column<DateTimeOffset>(type: "datetimeoffset", nullable: false),
|
||||
LastAttemptAt = table.Column<DateTimeOffset>(type: "datetimeoffset", nullable: true),
|
||||
NextAttemptAt = table.Column<DateTimeOffset>(type: "datetimeoffset", nullable: true),
|
||||
DeliveredAt = table.Column<DateTimeOffset>(type: "datetimeoffset", nullable: true)
|
||||
},
|
||||
constraints: table =>
|
||||
{
|
||||
table.PrimaryKey("PK_Notifications", x => x.NotificationId);
|
||||
});
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_Notifications_SourceSiteId_CreatedAt",
|
||||
table: "Notifications",
|
||||
columns: new[] { "SourceSiteId", "CreatedAt" });
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_Notifications_Status_NextAttemptAt",
|
||||
table: "Notifications",
|
||||
columns: new[] { "Status", "NextAttemptAt" });
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.DropTable(
|
||||
name: "Notifications");
|
||||
|
||||
migrationBuilder.DropColumn(
|
||||
name: "Type",
|
||||
table: "NotificationLists");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -566,6 +566,86 @@ namespace ScadaLink.ConfigurationDatabase.Migrations
|
||||
b.ToTable("InstanceConnectionBindings");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("ScadaLink.Commons.Entities.Notifications.Notification", b =>
|
||||
{
|
||||
b.Property<string>("NotificationId")
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<string>("Body")
|
||||
.IsRequired()
|
||||
.HasColumnType("nvarchar(max)");
|
||||
|
||||
b.Property<DateTimeOffset>("CreatedAt")
|
||||
.HasColumnType("datetimeoffset");
|
||||
|
||||
b.Property<DateTimeOffset?>("DeliveredAt")
|
||||
.HasColumnType("datetimeoffset");
|
||||
|
||||
b.Property<DateTimeOffset?>("LastAttemptAt")
|
||||
.HasColumnType("datetimeoffset");
|
||||
|
||||
b.Property<string>("LastError")
|
||||
.HasMaxLength(4000)
|
||||
.HasColumnType("nvarchar(4000)");
|
||||
|
||||
b.Property<string>("ListName")
|
||||
.IsRequired()
|
||||
.HasMaxLength(200)
|
||||
.HasColumnType("nvarchar(200)");
|
||||
|
||||
b.Property<DateTimeOffset?>("NextAttemptAt")
|
||||
.HasColumnType("datetimeoffset");
|
||||
|
||||
b.Property<string>("ResolvedTargets")
|
||||
.HasColumnType("nvarchar(max)");
|
||||
|
||||
b.Property<int>("RetryCount")
|
||||
.HasColumnType("int");
|
||||
|
||||
b.Property<DateTimeOffset>("SiteEnqueuedAt")
|
||||
.HasColumnType("datetimeoffset");
|
||||
|
||||
b.Property<string>("SourceInstanceId")
|
||||
.HasMaxLength(200)
|
||||
.HasColumnType("nvarchar(200)");
|
||||
|
||||
b.Property<string>("SourceScript")
|
||||
.HasMaxLength(200)
|
||||
.HasColumnType("nvarchar(200)");
|
||||
|
||||
b.Property<string>("SourceSiteId")
|
||||
.IsRequired()
|
||||
.HasMaxLength(100)
|
||||
.HasColumnType("nvarchar(100)");
|
||||
|
||||
b.Property<string>("Status")
|
||||
.IsRequired()
|
||||
.HasMaxLength(32)
|
||||
.HasColumnType("nvarchar(32)");
|
||||
|
||||
b.Property<string>("Subject")
|
||||
.IsRequired()
|
||||
.HasMaxLength(1000)
|
||||
.HasColumnType("nvarchar(1000)");
|
||||
|
||||
b.Property<string>("Type")
|
||||
.IsRequired()
|
||||
.HasMaxLength(32)
|
||||
.HasColumnType("nvarchar(32)");
|
||||
|
||||
b.Property<string>("TypeData")
|
||||
.HasColumnType("nvarchar(max)");
|
||||
|
||||
b.HasKey("NotificationId");
|
||||
|
||||
b.HasIndex("SourceSiteId", "CreatedAt");
|
||||
|
||||
b.HasIndex("Status", "NextAttemptAt");
|
||||
|
||||
b.ToTable("Notifications");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("ScadaLink.Commons.Entities.Notifications.NotificationList", b =>
|
||||
{
|
||||
b.Property<int>("Id")
|
||||
@@ -579,6 +659,11 @@ namespace ScadaLink.ConfigurationDatabase.Migrations
|
||||
.HasMaxLength(200)
|
||||
.HasColumnType("nvarchar(200)");
|
||||
|
||||
b.Property<string>("Type")
|
||||
.IsRequired()
|
||||
.HasMaxLength(32)
|
||||
.HasColumnType("nvarchar(32)");
|
||||
|
||||
b.HasKey("Id");
|
||||
|
||||
b.HasIndex("Name")
|
||||
|
||||
@@ -0,0 +1,241 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Repositories;
|
||||
|
||||
/// <summary>
|
||||
/// EF Core data access for the central notification outbox. See
|
||||
/// <see cref="INotificationOutboxRepository"/> for the behaviour contract.
|
||||
/// </summary>
|
||||
public class NotificationOutboxRepository : INotificationOutboxRepository
|
||||
{
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
|
||||
// Statuses that represent a finished notification lifecycle. Non-terminal is the complement.
|
||||
private static readonly NotificationStatus[] TerminalStatuses =
|
||||
{
|
||||
NotificationStatus.Delivered,
|
||||
NotificationStatus.Parked,
|
||||
NotificationStatus.Discarded,
|
||||
};
|
||||
|
||||
public NotificationOutboxRepository(ScadaLinkDbContext context)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
}
|
||||
|
||||
public async Task<bool> InsertIfNotExistsAsync(Notification n, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var exists = await _context.Notifications
|
||||
.AnyAsync(x => x.NotificationId == n.NotificationId, cancellationToken);
|
||||
if (exists)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
await _context.Notifications.AddAsync(n, cancellationToken);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
return true;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<Notification>> GetDueAsync(
|
||||
DateTimeOffset now, int batchSize, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await _context.Notifications
|
||||
.Where(n => n.Status == NotificationStatus.Pending
|
||||
|| (n.Status == NotificationStatus.Retrying
|
||||
&& n.NextAttemptAt != null
|
||||
&& n.NextAttemptAt <= now))
|
||||
.OrderBy(n => n.CreatedAt)
|
||||
.Take(batchSize)
|
||||
.ToListAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<Notification?> GetByIdAsync(string notificationId, CancellationToken cancellationToken = default)
|
||||
=> await _context.Notifications.FindAsync(new object[] { notificationId }, cancellationToken);
|
||||
|
||||
public async Task UpdateAsync(Notification n, CancellationToken cancellationToken = default)
|
||||
{
|
||||
_context.Notifications.Update(n);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<(IReadOnlyList<Notification> Rows, int TotalCount)> QueryAsync(
|
||||
NotificationOutboxFilter filter, int pageNumber, int pageSize, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var query = _context.Notifications.AsQueryable();
|
||||
|
||||
if (filter.Status is { } status)
|
||||
{
|
||||
query = query.Where(n => n.Status == status);
|
||||
}
|
||||
|
||||
if (filter.Type is { } type)
|
||||
{
|
||||
query = query.Where(n => n.Type == type);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(filter.SourceSiteId))
|
||||
{
|
||||
query = query.Where(n => n.SourceSiteId == filter.SourceSiteId);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(filter.ListName))
|
||||
{
|
||||
query = query.Where(n => n.ListName == filter.ListName);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(filter.SubjectKeyword))
|
||||
{
|
||||
query = query.Where(n => n.Subject.Contains(filter.SubjectKeyword));
|
||||
}
|
||||
|
||||
if (filter.StuckOnly && filter.StuckCutoff is { } stuckCutoff)
|
||||
{
|
||||
query = query.Where(n =>
|
||||
(n.Status == NotificationStatus.Pending || n.Status == NotificationStatus.Retrying)
|
||||
&& n.CreatedAt < stuckCutoff);
|
||||
}
|
||||
|
||||
if (filter.From is { } from)
|
||||
{
|
||||
query = query.Where(n => n.CreatedAt >= from);
|
||||
}
|
||||
|
||||
if (filter.To is { } to)
|
||||
{
|
||||
query = query.Where(n => n.CreatedAt <= to);
|
||||
}
|
||||
|
||||
var totalCount = await query.CountAsync(cancellationToken);
|
||||
|
||||
var rows = await query
|
||||
.OrderByDescending(n => n.CreatedAt)
|
||||
.Skip((pageNumber - 1) * pageSize)
|
||||
.Take(pageSize)
|
||||
.ToListAsync(cancellationToken);
|
||||
|
||||
return (rows, totalCount);
|
||||
}
|
||||
|
||||
public async Task<int> DeleteTerminalOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await _context.Notifications
|
||||
.Where(n => TerminalStatuses.Contains(n.Status) && n.CreatedAt < cutoff)
|
||||
.ExecuteDeleteAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<NotificationKpiSnapshot> ComputeKpisAsync(
|
||||
DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
var queueDepth = await _context.Notifications
|
||||
.CountAsync(n => n.Status == NotificationStatus.Pending
|
||||
|| n.Status == NotificationStatus.Retrying, cancellationToken);
|
||||
|
||||
var stuckCount = await _context.Notifications
|
||||
.CountAsync(n => (n.Status == NotificationStatus.Pending
|
||||
|| n.Status == NotificationStatus.Retrying)
|
||||
&& n.CreatedAt < stuckCutoff, cancellationToken);
|
||||
|
||||
var parkedCount = await _context.Notifications
|
||||
.CountAsync(n => n.Status == NotificationStatus.Parked, cancellationToken);
|
||||
|
||||
var deliveredLastInterval = await _context.Notifications
|
||||
.CountAsync(n => n.Status == NotificationStatus.Delivered
|
||||
&& n.DeliveredAt != null
|
||||
&& n.DeliveredAt >= deliveredSince, cancellationToken);
|
||||
|
||||
// Oldest non-terminal CreatedAt. The DateTimeOffset value converter makes a SQL
|
||||
// Min aggregate awkward, so order ascending and take the first instead.
|
||||
var nonTerminal = _context.Notifications
|
||||
.Where(n => n.Status == NotificationStatus.Pending
|
||||
|| n.Status == NotificationStatus.Retrying);
|
||||
|
||||
TimeSpan? oldestPendingAge = null;
|
||||
if (await nonTerminal.AnyAsync(cancellationToken))
|
||||
{
|
||||
var oldestCreatedAt = await nonTerminal
|
||||
.OrderBy(n => n.CreatedAt)
|
||||
.Select(n => n.CreatedAt)
|
||||
.FirstAsync(cancellationToken);
|
||||
oldestPendingAge = now - oldestCreatedAt;
|
||||
}
|
||||
|
||||
return new NotificationKpiSnapshot(
|
||||
QueueDepth: queueDepth,
|
||||
StuckCount: stuckCount,
|
||||
ParkedCount: parkedCount,
|
||||
DeliveredLastInterval: deliveredLastInterval,
|
||||
OldestPendingAge: oldestPendingAge);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SiteNotificationKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
var queueDepth = await CountBySiteAsync(
|
||||
n => n.Status == NotificationStatus.Pending || n.Status == NotificationStatus.Retrying,
|
||||
cancellationToken);
|
||||
|
||||
var stuck = await CountBySiteAsync(
|
||||
n => (n.Status == NotificationStatus.Pending || n.Status == NotificationStatus.Retrying)
|
||||
&& n.CreatedAt < stuckCutoff,
|
||||
cancellationToken);
|
||||
|
||||
var parked = await CountBySiteAsync(
|
||||
n => n.Status == NotificationStatus.Parked, cancellationToken);
|
||||
|
||||
var delivered = await CountBySiteAsync(
|
||||
n => n.Status == NotificationStatus.Delivered
|
||||
&& n.DeliveredAt != null && n.DeliveredAt >= deliveredSince,
|
||||
cancellationToken);
|
||||
|
||||
// Oldest non-terminal CreatedAt per site. A SQL Min over the DateTimeOffset
|
||||
// converter is awkward (see ComputeKpisAsync), so project the non-terminal
|
||||
// (site, created) pairs — the live queue, which stays bounded — and reduce
|
||||
// in memory.
|
||||
var oldest = (await _context.Notifications
|
||||
.Where(n => n.Status == NotificationStatus.Pending
|
||||
|| n.Status == NotificationStatus.Retrying)
|
||||
.Select(n => new { n.SourceSiteId, n.CreatedAt })
|
||||
.ToListAsync(cancellationToken))
|
||||
.GroupBy(x => x.SourceSiteId)
|
||||
.ToDictionary(g => g.Key, g => g.Min(x => x.CreatedAt));
|
||||
|
||||
var siteIds = queueDepth.Keys
|
||||
.Concat(stuck.Keys).Concat(parked.Keys).Concat(delivered.Keys)
|
||||
.Distinct()
|
||||
.OrderBy(s => s, StringComparer.Ordinal);
|
||||
|
||||
return siteIds.Select(site => new SiteNotificationKpiSnapshot(
|
||||
SourceSiteId: site,
|
||||
QueueDepth: queueDepth.GetValueOrDefault(site),
|
||||
StuckCount: stuck.GetValueOrDefault(site),
|
||||
ParkedCount: parked.GetValueOrDefault(site),
|
||||
DeliveredLastInterval: delivered.GetValueOrDefault(site),
|
||||
OldestPendingAge: oldest.TryGetValue(site, out var createdAt)
|
||||
? now - createdAt
|
||||
: null)).ToList();
|
||||
}
|
||||
|
||||
/// <summary>Counts notification rows matching <paramref name="predicate"/>, grouped by source site.</summary>
|
||||
private async Task<Dictionary<string, int>> CountBySiteAsync(
|
||||
System.Linq.Expressions.Expression<Func<Notification, bool>> predicate,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
return await _context.Notifications
|
||||
.Where(predicate)
|
||||
.GroupBy(n => n.SourceSiteId)
|
||||
.Select(g => new { Site = g.Key, Count = g.Count() })
|
||||
.ToDictionaryAsync(x => x.Site, x => x.Count, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<int> SaveChangesAsync(CancellationToken cancellationToken = default)
|
||||
=> await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
@@ -69,6 +69,7 @@ public class ScadaLinkDbContext : DbContext, IDataProtectionKeyContext
|
||||
public DbSet<NotificationList> NotificationLists => Set<NotificationList>();
|
||||
public DbSet<NotificationRecipient> NotificationRecipients => Set<NotificationRecipient>();
|
||||
public DbSet<SmtpConfiguration> SmtpConfigurations => Set<SmtpConfiguration>();
|
||||
public DbSet<Notification> Notifications => Set<Notification>();
|
||||
|
||||
// Scripts
|
||||
public DbSet<SharedScript> SharedScripts => Set<SharedScript>();
|
||||
|
||||
@@ -45,6 +45,7 @@ public static class ServiceCollectionExtensions
|
||||
services.AddScoped<ISiteRepository, SiteRepository>();
|
||||
services.AddScoped<IExternalSystemRepository, ExternalSystemRepository>();
|
||||
services.AddScoped<INotificationRepository, NotificationRepository>();
|
||||
services.AddScoped<INotificationOutboxRepository, NotificationOutboxRepository>();
|
||||
services.AddScoped<IInboundApiRepository, InboundApiRepository>();
|
||||
services.AddScoped<IAuditService, AuditService>();
|
||||
services.AddScoped<IInstanceLocator, InstanceLocator>();
|
||||
|
||||
@@ -260,6 +260,40 @@ akka {{
|
||||
mgmtHolder.ActorRef = mgmtActor;
|
||||
_logger.LogInformation("ManagementActor registered with ClusterClientReceptionist");
|
||||
|
||||
// Notification Outbox — cluster singleton so exactly one node owns ingest,
|
||||
// the dispatch sweep and the purge loop. Central actors run on the base
|
||||
// "Central" role, so the singleton settings are NOT role-scoped (unlike the
|
||||
// site singletons, which are scoped to a per-site role).
|
||||
var outboxOptions = _serviceProvider
|
||||
.GetRequiredService<IOptions<ScadaLink.NotificationOutbox.NotificationOutboxOptions>>().Value;
|
||||
var outboxLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
|
||||
.CreateLogger<ScadaLink.NotificationOutbox.NotificationOutboxActor>();
|
||||
|
||||
var outboxSingletonProps = ClusterSingletonManager.Props(
|
||||
singletonProps: Props.Create(() => new ScadaLink.NotificationOutbox.NotificationOutboxActor(
|
||||
_serviceProvider,
|
||||
outboxOptions,
|
||||
outboxLogger)),
|
||||
terminationMessage: PoisonPill.Instance,
|
||||
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
|
||||
.WithSingletonName("notification-outbox"));
|
||||
_actorSystem!.ActorOf(outboxSingletonProps, "notification-outbox-singleton");
|
||||
|
||||
var outboxProxyProps = ClusterSingletonProxy.Props(
|
||||
singletonManagerPath: "/user/notification-outbox-singleton",
|
||||
settings: ClusterSingletonProxySettings.Create(_actorSystem)
|
||||
.WithSingletonName("notification-outbox"));
|
||||
var outboxProxy = _actorSystem.ActorOf(outboxProxyProps, "notification-outbox-proxy");
|
||||
|
||||
// Hand the outbox proxy to the CentralCommunicationActor so forwarded
|
||||
// NotificationSubmit messages from sites are routed to the outbox singleton.
|
||||
centralCommActor.Tell(new RegisterNotificationOutbox(outboxProxy));
|
||||
|
||||
// Hand the same proxy to the CommunicationService so the Central UI can
|
||||
// Ask the outbox actor directly (query, retry, discard, KPIs).
|
||||
commService?.SetNotificationOutbox(outboxProxy);
|
||||
_logger.LogInformation("NotificationOutbox singleton created and registered with CentralCommunicationActor");
|
||||
|
||||
_logger.LogInformation("Central actors registered. CentralCommunicationActor created.");
|
||||
}
|
||||
|
||||
@@ -422,15 +456,18 @@ akka {{
|
||||
.GetRequiredService<ScadaLink.ExternalSystemGateway.DatabaseGateway>()
|
||||
.DeliverBufferedAsync(msg);
|
||||
});
|
||||
// Notification Outbox: a buffered notification is no longer delivered by
|
||||
// the site over SMTP. "Delivering" it means forwarding it to the central
|
||||
// cluster via the SiteCommunicationActor and treating central's
|
||||
// NotificationSubmitAck as the outcome (accepted → delivered; not accepted
|
||||
// or timeout → throw → transient → keep buffering). Central owns SMTP.
|
||||
var notificationForwarder = new ScadaLink.StoreAndForward.NotificationForwarder(
|
||||
siteCommActor,
|
||||
_nodeOptions.SiteId!,
|
||||
_communicationOptions.NotificationForwardTimeout);
|
||||
storeAndForwardService.RegisterDeliveryHandler(
|
||||
ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.Notification,
|
||||
async msg =>
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
return await scope.ServiceProvider
|
||||
.GetRequiredService<ScadaLink.NotificationService.NotificationDeliveryService>()
|
||||
.DeliverBufferedAsync(msg);
|
||||
});
|
||||
notificationForwarder.DeliverAsync);
|
||||
_logger.LogInformation(
|
||||
"Store-and-forward delivery handlers registered (ExternalSystem, CachedDbWrite, Notification)");
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ using ScadaLink.Host.Actors;
|
||||
using ScadaLink.Host.Health;
|
||||
using ScadaLink.InboundAPI;
|
||||
using ScadaLink.ManagementService;
|
||||
using ScadaLink.NotificationOutbox;
|
||||
using ScadaLink.NotificationService;
|
||||
using ScadaLink.Security;
|
||||
using ScadaLink.TemplateEngine;
|
||||
@@ -72,6 +73,10 @@ try
|
||||
builder.Services.AddNotificationService();
|
||||
|
||||
// Central-only components
|
||||
// Notification Outbox: central owns SMTP delivery; the Email adapter reuses the
|
||||
// AddNotificationService() SMTP machinery above. AddNotificationOutbox binds
|
||||
// NotificationOutboxOptions via BindConfiguration, so no explicit Configure is needed.
|
||||
builder.Services.AddNotificationOutbox();
|
||||
builder.Services.AddTemplateEngine();
|
||||
builder.Services.AddDeploymentManager();
|
||||
builder.Services.AddSecurity();
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
<ProjectReference Include="../ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.ExternalSystemGateway/ScadaLink.ExternalSystemGateway.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.NotificationService/ScadaLink.NotificationService.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.NotificationOutbox/ScadaLink.NotificationOutbox.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.CentralUI/ScadaLink.CentralUI.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.Security/ScadaLink.Security.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
||||
|
||||
@@ -25,7 +25,12 @@ public static class SiteServiceRegistration
|
||||
services.AddCommunication();
|
||||
services.AddSiteHealthMonitoring();
|
||||
services.AddExternalSystemGateway();
|
||||
services.AddNotificationService();
|
||||
// AddNotificationService() is intentionally NOT registered on the site path.
|
||||
// Sites no longer deliver notifications over SMTP — a buffered notification is
|
||||
// forwarded to the central cluster (via NotificationForwarder / SiteCommunicationActor),
|
||||
// and central owns SMTP delivery through the Notification Outbox. The SMTP machinery
|
||||
// (OAuth2TokenService, ISmtpClientWrapper, INotificationDeliveryService) has no
|
||||
// consumer on a site node.
|
||||
|
||||
// Health report transport: sends SiteHealthReport to SiteCommunicationActor via Akka
|
||||
services.AddSingleton<ISiteIdentityProvider, SiteIdentityProvider>();
|
||||
|
||||
@@ -52,6 +52,14 @@
|
||||
"AuthMode": "None",
|
||||
"FromAddress": "scada-notifications@company.com"
|
||||
},
|
||||
"NotificationOutbox": {
|
||||
"DispatchInterval": "00:00:10",
|
||||
"DispatchBatchSize": 100,
|
||||
"StuckAgeThreshold": "00:10:00",
|
||||
"TerminalRetention": "365.00:00:00",
|
||||
"PurgeInterval": "1.00:00:00",
|
||||
"DeliveredKpiWindow": "00:01:00"
|
||||
},
|
||||
"Logging": {
|
||||
"MinimumLevel": "Information"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
namespace ScadaLink.NotificationOutbox.Delivery;
|
||||
|
||||
/// <summary>
|
||||
/// Classification of a single delivery attempt. Transient failures are eligible for
|
||||
/// retry; permanent failures are terminal and not retried.
|
||||
/// </summary>
|
||||
public enum DeliveryResult
|
||||
{
|
||||
/// <summary>The notification was delivered successfully.</summary>
|
||||
Success,
|
||||
|
||||
/// <summary>Delivery failed for a transient reason and may succeed on retry.</summary>
|
||||
TransientFailure,
|
||||
|
||||
/// <summary>Delivery failed for a permanent reason and must not be retried.</summary>
|
||||
PermanentFailure
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of a delivery attempt produced by an <see cref="INotificationDeliveryAdapter"/>.
|
||||
/// </summary>
|
||||
/// <param name="Result">The classification of the attempt.</param>
|
||||
/// <param name="ResolvedTargets">
|
||||
/// The concrete delivery targets used, snapshotted for audit. Set only on success.
|
||||
/// </param>
|
||||
/// <param name="Error">A human-readable failure description. Set only on failure.</param>
|
||||
public record DeliveryOutcome(DeliveryResult Result, string? ResolvedTargets, string? Error)
|
||||
{
|
||||
/// <summary>Creates a successful outcome carrying the resolved delivery targets.</summary>
|
||||
public static DeliveryOutcome Success(string resolvedTargets) =>
|
||||
new(DeliveryResult.Success, resolvedTargets, null);
|
||||
|
||||
/// <summary>Creates a transient-failure outcome carrying an error description.</summary>
|
||||
public static DeliveryOutcome Transient(string error) =>
|
||||
new(DeliveryResult.TransientFailure, null, error);
|
||||
|
||||
/// <summary>Creates a permanent-failure outcome carrying an error description.</summary>
|
||||
public static DeliveryOutcome Permanent(string error) =>
|
||||
new(DeliveryResult.PermanentFailure, null, error);
|
||||
}
|
||||
@@ -0,0 +1,218 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.NotificationService;
|
||||
|
||||
namespace ScadaLink.NotificationOutbox.Delivery;
|
||||
|
||||
/// <summary>
|
||||
/// Task 12: Email channel delivery adapter for the central notification outbox.
|
||||
///
|
||||
/// Reuses the <see cref="ScadaLink.NotificationService"/> SMTP machinery —
|
||||
/// <see cref="ISmtpClientWrapper"/>, <see cref="SmtpTlsModeParser"/>,
|
||||
/// <see cref="OAuth2TokenService"/> and the typed <see cref="SmtpPermanentException"/>.
|
||||
/// The connect/auth/send/disconnect sequence and error classification mirror
|
||||
/// <c>NotificationDeliveryService.DeliverAsync</c>; this adapter, however, maps the
|
||||
/// result to the outbox's three-way <see cref="DeliveryOutcome"/> (Success / Permanent
|
||||
/// / Transient) rather than the S&F-coupled <c>NotificationResult</c>, which cannot
|
||||
/// distinguish a permanent failure from a buffered transient one.
|
||||
/// </summary>
|
||||
public sealed class EmailNotificationDeliveryAdapter : INotificationDeliveryAdapter
|
||||
{
|
||||
private readonly INotificationRepository _repository;
|
||||
private readonly Func<ISmtpClientWrapper> _smtpClientFactory;
|
||||
private readonly OAuth2TokenService? _tokenService;
|
||||
private readonly ILogger<EmailNotificationDeliveryAdapter> _logger;
|
||||
private readonly NotificationOptions _options;
|
||||
|
||||
public EmailNotificationDeliveryAdapter(
|
||||
INotificationRepository repository,
|
||||
Func<ISmtpClientWrapper> smtpClientFactory,
|
||||
ILogger<EmailNotificationDeliveryAdapter> logger,
|
||||
OAuth2TokenService? tokenService = null,
|
||||
IOptions<NotificationOptions>? options = null)
|
||||
{
|
||||
_repository = repository;
|
||||
_smtpClientFactory = smtpClientFactory;
|
||||
_logger = logger;
|
||||
_tokenService = tokenService;
|
||||
// Mirrors NotificationDeliveryService: NotificationOptions supplies the
|
||||
// documented fallback values used when a deployed SmtpConfiguration row
|
||||
// leaves a field unset (non-positive).
|
||||
_options = options?.Value ?? new NotificationOptions();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public NotificationType Type => NotificationType.Email;
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DeliveryOutcome> DeliverAsync(
|
||||
Notification notification, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(notification);
|
||||
|
||||
var list = await _repository.GetListByNameAsync(notification.ListName, cancellationToken);
|
||||
if (list == null)
|
||||
{
|
||||
return DeliveryOutcome.Permanent(
|
||||
$"Notification list '{notification.ListName}' not found");
|
||||
}
|
||||
|
||||
var recipients = await _repository.GetRecipientsByListIdAsync(list.Id, cancellationToken);
|
||||
if (recipients.Count == 0)
|
||||
{
|
||||
return DeliveryOutcome.Permanent(
|
||||
$"Notification list '{notification.ListName}' has no recipients");
|
||||
}
|
||||
|
||||
var smtpConfigs = await _repository.GetAllSmtpConfigurationsAsync(cancellationToken);
|
||||
var smtpConfig = smtpConfigs.FirstOrDefault();
|
||||
if (smtpConfig == null)
|
||||
{
|
||||
return DeliveryOutcome.Permanent("No SMTP configuration available");
|
||||
}
|
||||
|
||||
// An unknown TLS mode is a configuration error that retrying cannot fix —
|
||||
// surface it as a permanent failure (mirrors NS-005 in NotificationDeliveryService).
|
||||
SmtpTlsMode tlsMode;
|
||||
try
|
||||
{
|
||||
tlsMode = SmtpTlsModeParser.Parse(smtpConfig.TlsMode);
|
||||
}
|
||||
catch (ArgumentException ex)
|
||||
{
|
||||
_logger.LogError(
|
||||
"Email notification to list '{List}' has an invalid SMTP TLS mode: {Reason}",
|
||||
notification.ListName, ex.Message);
|
||||
return DeliveryOutcome.Permanent(ex.Message);
|
||||
}
|
||||
|
||||
// A malformed sender or recipient address cannot be fixed by retrying —
|
||||
// surface it as a permanent failure (mirrors NS-008).
|
||||
var addressError = EmailAddressValidator.ValidateAddresses(
|
||||
smtpConfig.FromAddress, recipients);
|
||||
if (addressError != null)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Email notification to list '{List}' has invalid addresses: {Reason}",
|
||||
notification.ListName, addressError);
|
||||
return DeliveryOutcome.Permanent(addressError);
|
||||
}
|
||||
|
||||
var recipientAddresses = recipients.Select(r => r.EmailAddress).ToList();
|
||||
|
||||
try
|
||||
{
|
||||
await SendAsync(smtpConfig, tlsMode, recipientAddresses,
|
||||
notification.Subject, notification.Body, cancellationToken);
|
||||
|
||||
return DeliveryOutcome.Success(string.Join(", ", recipientAddresses));
|
||||
}
|
||||
catch (SmtpPermanentException ex)
|
||||
{
|
||||
// Permanent SMTP failure (5xx) — not retried.
|
||||
var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials);
|
||||
_logger.LogError(
|
||||
"Permanent SMTP failure delivering email to list '{List}': {Detail}",
|
||||
notification.ListName, detail);
|
||||
return DeliveryOutcome.Permanent(detail);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// A caller-requested cancellation propagates; it is neither a success
|
||||
// nor a delivery failure.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (SmtpErrorClassifier.IsTransient(ex, cancellationToken))
|
||||
{
|
||||
// Transient SMTP failure (4xx, socket/protocol/timeout) — eligible for retry.
|
||||
var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials);
|
||||
_logger.LogWarning(
|
||||
"Transient SMTP failure delivering email to list '{List}' ({ExceptionType}): {Detail}",
|
||||
notification.ListName, ex.GetType().Name, detail);
|
||||
return DeliveryOutcome.Transient(detail);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// An unclassified failure — chiefly an OAuth2 token-fetch failure. The
|
||||
// outbox treats it as permanent: retrying a broken credential burns
|
||||
// token-endpoint calls. (Mirrors the NS-015 default-to-permanent stance.)
|
||||
var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials);
|
||||
_logger.LogError(
|
||||
"Unclassified failure delivering email to list '{List}' ({ExceptionType}): {Detail}",
|
||||
notification.ListName, ex.GetType().Name, detail);
|
||||
return DeliveryOutcome.Permanent($"Email delivery failed: {detail}");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Delivers the plain-text BCC email via SMTP. Mirrors the connect/auth/send/
|
||||
/// disconnect sequence of <c>NotificationDeliveryService.DeliverAsync</c>: a
|
||||
/// permanent failure surfaces as <see cref="SmtpPermanentException"/>; transient
|
||||
/// failures propagate for the caller's classifier; the connection is always torn
|
||||
/// down in the finally block.
|
||||
/// </summary>
|
||||
private async Task SendAsync(
|
||||
SmtpConfiguration config,
|
||||
SmtpTlsMode tlsMode,
|
||||
IReadOnlyList<string> bccAddresses,
|
||||
string subject,
|
||||
string body,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// Create exactly one client and dispose the one actually used (NS-004).
|
||||
var smtp = _smtpClientFactory();
|
||||
using var disposable = smtp as IDisposable;
|
||||
|
||||
try
|
||||
{
|
||||
var timeoutSeconds = config.ConnectionTimeoutSeconds > 0
|
||||
? config.ConnectionTimeoutSeconds
|
||||
: _options.ConnectionTimeoutSeconds;
|
||||
await smtp.ConnectAsync(
|
||||
config.Host, config.Port, tlsMode, timeoutSeconds, cancellationToken);
|
||||
|
||||
// Resolve credentials (OAuth2 token fetched/cached by the token service).
|
||||
var credentials = config.Credentials;
|
||||
if (config.AuthType.Equals("oauth2", StringComparison.OrdinalIgnoreCase)
|
||||
&& _tokenService != null && credentials != null)
|
||||
{
|
||||
credentials = await _tokenService.GetTokenAsync(credentials, cancellationToken);
|
||||
}
|
||||
|
||||
await smtp.AuthenticateAsync(config.AuthType, credentials, cancellationToken);
|
||||
await smtp.SendAsync(config.FromAddress, bccAddresses, subject, body, cancellationToken);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// A deliberate cancellation must propagate, not be misclassified as transient.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (SmtpErrorClassifier.Classify(ex, cancellationToken) == SmtpErrorClass.Permanent
|
||||
&& ex is not SmtpPermanentException)
|
||||
{
|
||||
// Permanent SMTP failure (5xx) — surface a typed permanent exception.
|
||||
throw new SmtpPermanentException(ex.Message, ex);
|
||||
}
|
||||
// Transient and SmtpPermanentException propagate unchanged for DeliverAsync's
|
||||
// catch filters to classify.
|
||||
finally
|
||||
{
|
||||
// Always tear the connection down, regardless of outcome (NS-010).
|
||||
// Disconnect is best-effort: a disconnect failure must not mask the
|
||||
// original delivery exception.
|
||||
try
|
||||
{
|
||||
await smtp.DisconnectAsync(cancellationToken);
|
||||
}
|
||||
catch (Exception disconnectEx)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Ignoring SMTP disconnect failure during cleanup: {Reason}", disconnectEx.Message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.NotificationOutbox.Delivery;
|
||||
|
||||
/// <summary>
|
||||
/// Channel-specific delivery strategy for outbox notifications. Each adapter handles
|
||||
/// a single <see cref="NotificationType"/>; the outbox dispatcher selects the adapter
|
||||
/// matching a notification's type.
|
||||
/// </summary>
|
||||
public interface INotificationDeliveryAdapter
|
||||
{
|
||||
/// <summary>The notification channel this adapter delivers.</summary>
|
||||
NotificationType Type { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Attempts delivery of the given notification and reports the classified outcome.
|
||||
/// </summary>
|
||||
/// <param name="notification">The notification to deliver.</param>
|
||||
/// <param name="cancellationToken">Token used to cancel the delivery attempt.</param>
|
||||
/// <returns>The outcome of the delivery attempt.</returns>
|
||||
Task<DeliveryOutcome> DeliverAsync(Notification notification, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
using Akka.Actor;
|
||||
|
||||
namespace ScadaLink.NotificationOutbox.Messages;
|
||||
|
||||
/// <summary>
|
||||
/// Actor-internal message types for the <see cref="NotificationOutboxActor"/>. These are
|
||||
/// never sent across the network — they bridge the actor's async repository/delivery work
|
||||
/// back onto the actor's own mailbox so handlers run single-threaded on the actor.
|
||||
/// </summary>
|
||||
internal static class InternalMessages
|
||||
{
|
||||
/// <summary>
|
||||
/// Result of an asynchronous ingest persistence attempt, piped back to the actor.
|
||||
/// Carries the original <paramref name="Sender"/> so the actor can ack the site that
|
||||
/// submitted the notification once the insert completes.
|
||||
/// </summary>
|
||||
/// <param name="NotificationId">Id of the notification that was submitted.</param>
|
||||
/// <param name="Sender">Original submitter to receive the ack.</param>
|
||||
/// <param name="Succeeded">
|
||||
/// True if persistence completed without error — covers both a fresh insert and an
|
||||
/// already-existing row (idempotent re-submission). False only when the repository threw.
|
||||
/// </param>
|
||||
/// <param name="Error">Failure detail when <paramref name="Succeeded"/> is false; otherwise null.</param>
|
||||
internal sealed record IngestPersisted(
|
||||
string NotificationId,
|
||||
IActorRef Sender,
|
||||
bool Succeeded,
|
||||
string? Error);
|
||||
|
||||
/// <summary>
|
||||
/// Periodic tick that triggers a dispatch sweep. Started as a periodic timer in
|
||||
/// <c>PreStart</c> at the configured <c>DispatchInterval</c>. A singleton instance is
|
||||
/// reused so the timer carries no per-tick state.
|
||||
/// </summary>
|
||||
internal sealed class DispatchTick
|
||||
{
|
||||
/// <summary>The shared singleton tick instance scheduled by the dispatch timer.</summary>
|
||||
internal static readonly DispatchTick Instance = new();
|
||||
|
||||
private DispatchTick() { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Completion signal for an asynchronous dispatch sweep, piped back to the actor so the
|
||||
/// in-flight guard is cleared on the actor thread. Sent on both success and failure of
|
||||
/// the sweep — the actor only needs to know the sweep has finished.
|
||||
/// </summary>
|
||||
internal sealed class DispatchComplete
|
||||
{
|
||||
/// <summary>The shared singleton completion instance.</summary>
|
||||
internal static readonly DispatchComplete Instance = new();
|
||||
|
||||
private DispatchComplete() { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Periodic tick that triggers a purge sweep of terminal notification rows. Started as a
|
||||
/// periodic timer in <c>PreStart</c> at the configured <c>PurgeInterval</c>. A singleton
|
||||
/// instance is reused so the timer carries no per-tick state.
|
||||
/// </summary>
|
||||
internal sealed class PurgeTick
|
||||
{
|
||||
/// <summary>The shared singleton tick instance scheduled by the purge timer.</summary>
|
||||
internal static readonly PurgeTick Instance = new();
|
||||
|
||||
private PurgeTick() { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Completion signal for an asynchronous purge sweep, piped back to the actor so the
|
||||
/// sweep's outcome (logged in the pipe projection) is observed on the actor thread.
|
||||
/// Sent on both success and failure of the sweep.
|
||||
/// </summary>
|
||||
internal sealed class PurgeComplete
|
||||
{
|
||||
/// <summary>The shared singleton completion instance.</summary>
|
||||
internal static readonly PurgeComplete Instance = new();
|
||||
|
||||
private PurgeComplete() { }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,687 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
using ScadaLink.NotificationOutbox.Delivery;
|
||||
using ScadaLink.NotificationOutbox.Messages;
|
||||
|
||||
namespace ScadaLink.NotificationOutbox;
|
||||
|
||||
/// <summary>
|
||||
/// Central-side actor that owns the notification outbox. It accepts
|
||||
/// <see cref="NotificationSubmit"/> messages forwarded from sites and persists each as a
|
||||
/// <see cref="Notification"/> row (the ingest path), and runs a periodic dispatch loop
|
||||
/// that claims due notifications, delivers them through the matching channel adapter, and
|
||||
/// applies the resulting status transition. It also runs a periodic purge that bulk-deletes
|
||||
/// terminal notification rows once they age past the configured retention window.
|
||||
/// </summary>
|
||||
public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
{
|
||||
private const string DispatchTimerKey = "dispatch";
|
||||
private const string PurgeTimerKey = "purge";
|
||||
|
||||
/// <summary>Retry policy fallback used when no SMTP configuration row is present.</summary>
|
||||
private const int FallbackMaxRetries = 10;
|
||||
private static readonly TimeSpan FallbackRetryDelay = TimeSpan.FromMinutes(1);
|
||||
|
||||
private readonly IServiceProvider _serviceProvider;
|
||||
private readonly NotificationOutboxOptions _options;
|
||||
private readonly ILogger<NotificationOutboxActor> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// In-flight guard for the dispatch loop. Set true at the start of a sweep and cleared
|
||||
/// when the sweep's <see cref="InternalMessages.DispatchComplete"/> arrives. While true,
|
||||
/// further <see cref="InternalMessages.DispatchTick"/>s are dropped so sweeps never overlap.
|
||||
/// </summary>
|
||||
private bool _dispatching;
|
||||
|
||||
/// <summary>Akka timer scheduler, assigned by the actor system via <see cref="IWithTimers"/>.</summary>
|
||||
public ITimerScheduler Timers { get; set; } = null!;
|
||||
|
||||
public NotificationOutboxActor(
|
||||
IServiceProvider serviceProvider,
|
||||
NotificationOutboxOptions options,
|
||||
ILogger<NotificationOutboxActor> logger)
|
||||
{
|
||||
_serviceProvider = serviceProvider;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
|
||||
Receive<NotificationSubmit>(HandleSubmit);
|
||||
Receive<InternalMessages.IngestPersisted>(HandleIngestPersisted);
|
||||
Receive<InternalMessages.DispatchTick>(_ => HandleDispatchTick());
|
||||
Receive<InternalMessages.DispatchComplete>(_ => _dispatching = false);
|
||||
Receive<InternalMessages.PurgeTick>(_ => HandlePurgeTick());
|
||||
// No-op: purge has no in-flight guard to lower, and the outcome is already logged
|
||||
// by the PipeTo projections, so PurgeComplete carries nothing to act on.
|
||||
Receive<InternalMessages.PurgeComplete>(_ => { });
|
||||
Receive<NotificationOutboxQueryRequest>(HandleQuery);
|
||||
Receive<NotificationStatusQuery>(HandleStatusQuery);
|
||||
Receive<RetryNotificationRequest>(HandleRetry);
|
||||
Receive<DiscardNotificationRequest>(HandleDiscard);
|
||||
Receive<NotificationKpiRequest>(HandleKpiRequest);
|
||||
Receive<PerSiteNotificationKpiRequest>(HandlePerSiteKpiRequest);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the periodic timers once the actor is running: the dispatch loop at
|
||||
/// <see cref="NotificationOutboxOptions.DispatchInterval"/> and the terminal-row purge
|
||||
/// at <see cref="NotificationOutboxOptions.PurgeInterval"/>.
|
||||
/// </summary>
|
||||
protected override void PreStart()
|
||||
{
|
||||
base.PreStart();
|
||||
Timers.StartPeriodicTimer(
|
||||
DispatchTimerKey, InternalMessages.DispatchTick.Instance, _options.DispatchInterval);
|
||||
Timers.StartPeriodicTimer(
|
||||
PurgeTimerKey, InternalMessages.PurgeTick.Instance, _options.PurgeInterval);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maps an inbound <see cref="NotificationSubmit"/> onto a <see cref="Notification"/>,
|
||||
/// persists it idempotently, and pipes the outcome back to <see cref="Self"/> so the
|
||||
/// ack is sent from the actor thread with the original sender preserved.
|
||||
/// </summary>
|
||||
private void HandleSubmit(NotificationSubmit msg)
|
||||
{
|
||||
var sender = Sender;
|
||||
var notification = BuildNotification(msg);
|
||||
|
||||
// The success projection fires for both a fresh insert and an existing row;
|
||||
// only a thrown repository error reaches the failure projection.
|
||||
PersistAsync(notification).PipeTo(
|
||||
Self,
|
||||
success: () => new InternalMessages.IngestPersisted(
|
||||
msg.NotificationId, sender, Succeeded: true, Error: null),
|
||||
failure: ex => new InternalMessages.IngestPersisted(
|
||||
msg.NotificationId, sender, Succeeded: false, Error: ex.GetBaseException().Message));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves a scoped <see cref="INotificationOutboxRepository"/> and inserts the
|
||||
/// notification if a row with the same id does not already exist. The boolean result
|
||||
/// of <c>InsertIfNotExistsAsync</c> is intentionally ignored: an existing row is an
|
||||
/// idempotent re-submission and is acked just like a fresh insert so the site can
|
||||
/// clear its forward buffer. Only a thrown error must surface to the caller.
|
||||
/// </summary>
|
||||
private async Task PersistAsync(Notification notification)
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
await repository.InsertIfNotExistsAsync(notification);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Acks the original submitter once persistence completes. <see cref="NotificationSubmitAck"/>
|
||||
/// is <c>Accepted</c> for both a fresh insert and an existing row; only a thrown
|
||||
/// repository error produces <c>Accepted: false</c> so the site retries the forward.
|
||||
/// </summary>
|
||||
private void HandleIngestPersisted(InternalMessages.IngestPersisted msg)
|
||||
{
|
||||
if (msg.Succeeded)
|
||||
{
|
||||
_logger.LogDebug("Notification {NotificationId} ingested into outbox.", msg.NotificationId);
|
||||
msg.Sender.Tell(new NotificationSubmitAck(msg.NotificationId, Accepted: true, Error: null));
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Failed to ingest notification {NotificationId}: {Error}",
|
||||
msg.NotificationId, msg.Error);
|
||||
msg.Sender.Tell(new NotificationSubmitAck(msg.NotificationId, Accepted: false, Error: msg.Error));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a dispatch tick. If a sweep is already in flight the tick is dropped so
|
||||
/// sweeps never overlap; otherwise the guard is raised and an asynchronous sweep is
|
||||
/// launched, with a <see cref="InternalMessages.DispatchComplete"/> piped back to
|
||||
/// <see cref="Self"/> to lower the guard on the actor thread.
|
||||
/// </summary>
|
||||
private void HandleDispatchTick()
|
||||
{
|
||||
if (_dispatching)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_dispatching = true;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
// RunDispatchPass swallows its own errors, but the failure projection is kept as a
|
||||
// belt-and-braces guard so even a faulted task still lowers the in-flight guard —
|
||||
// otherwise the dispatcher would wedge permanently.
|
||||
RunDispatchPass(now).PipeTo(
|
||||
Self,
|
||||
success: () => InternalMessages.DispatchComplete.Instance,
|
||||
failure: ex =>
|
||||
{
|
||||
_logger.LogError(ex, "Dispatch sweep faulted unexpectedly.");
|
||||
return InternalMessages.DispatchComplete.Instance;
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs a single dispatch sweep: claims the due batch, resolves the retry policy, and
|
||||
/// delivers each notification sequentially. Per-notification failures are caught and
|
||||
/// logged so one bad row never aborts the rest of the batch. The whole body is wrapped
|
||||
/// in a try/catch so the returned task never faults — scope creation, service resolution,
|
||||
/// and retry-policy resolution can all throw, and a faulted task would otherwise leave
|
||||
/// the dispatcher's in-flight guard stuck and wedge the loop permanently.
|
||||
///
|
||||
/// The channel delivery adapters are resolved from the per-sweep scope, not held in a
|
||||
/// field: <see cref="EmailNotificationDeliveryAdapter"/> takes a scoped
|
||||
/// <see cref="INotificationRepository"/> directly, so a long-lived adapter reference on
|
||||
/// this singleton actor would be a captive dependency over a disposed DbContext.
|
||||
/// </summary>
|
||||
private async Task RunDispatchPass(DateTimeOffset now)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var outboxRepository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var notificationRepository = scope.ServiceProvider.GetRequiredService<INotificationRepository>();
|
||||
var adapters = ResolveAdapters(scope.ServiceProvider);
|
||||
|
||||
IReadOnlyList<Notification> due;
|
||||
try
|
||||
{
|
||||
due = await outboxRepository.GetDueAsync(now, _options.DispatchBatchSize);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Dispatch sweep failed to claim due notifications.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (due.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var (maxRetries, retryDelay) = await ResolveRetryPolicyAsync(notificationRepository);
|
||||
|
||||
foreach (var notification in due)
|
||||
{
|
||||
try
|
||||
{
|
||||
await DeliverOneAsync(notification, now, maxRetries, retryDelay, outboxRepository, adapters);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Isolate per-notification failures so the remainder of the batch still runs.
|
||||
_logger.LogError(
|
||||
ex, "Dispatch failed for notification {NotificationId}.", notification.NotificationId);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Scope/service resolution or retry-policy resolution faulted; swallow and log so
|
||||
// the returned task completes normally and the in-flight guard is always cleared.
|
||||
_logger.LogError(ex, "Dispatch sweep failed unexpectedly.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the retry policy from the first SMTP configuration row. When no SMTP
|
||||
/// configuration exists, falls back to a conservative default — delivery itself will
|
||||
/// permanently fail in that case, so the policy only acts as a guard.
|
||||
/// </summary>
|
||||
private async Task<(int MaxRetries, TimeSpan RetryDelay)> ResolveRetryPolicyAsync(
|
||||
INotificationRepository notificationRepository)
|
||||
{
|
||||
var configurations = await notificationRepository.GetAllSmtpConfigurationsAsync();
|
||||
var configuration = configurations.Count > 0 ? configurations[0] : null;
|
||||
return configuration is null
|
||||
? (FallbackMaxRetries, FallbackRetryDelay)
|
||||
: (configuration.MaxRetries, configuration.RetryDelay);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the <see cref="NotificationType"/> → adapter lookup for a dispatch sweep from
|
||||
/// the registered <see cref="INotificationDeliveryAdapter"/> services in the supplied
|
||||
/// scope. The last adapter registered for a given type wins, mirroring DI's last-wins
|
||||
/// resolution semantics.
|
||||
/// </summary>
|
||||
private static IReadOnlyDictionary<NotificationType, INotificationDeliveryAdapter> ResolveAdapters(
|
||||
IServiceProvider scopedServices)
|
||||
{
|
||||
var adapters = new Dictionary<NotificationType, INotificationDeliveryAdapter>();
|
||||
foreach (var adapter in scopedServices.GetServices<INotificationDeliveryAdapter>())
|
||||
{
|
||||
adapters[adapter.Type] = adapter;
|
||||
}
|
||||
|
||||
return adapters;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Delivers a single notification through its channel adapter and applies the resulting
|
||||
/// status transition. A missing adapter parks the notification; otherwise the
|
||||
/// <see cref="DeliveryOutcome"/> drives the transition. The updated row is always persisted.
|
||||
/// </summary>
|
||||
private async Task DeliverOneAsync(
|
||||
Notification notification,
|
||||
DateTimeOffset now,
|
||||
int maxRetries,
|
||||
TimeSpan retryDelay,
|
||||
INotificationOutboxRepository outboxRepository,
|
||||
IReadOnlyDictionary<NotificationType, INotificationDeliveryAdapter> adapters)
|
||||
{
|
||||
if (!adapters.TryGetValue(notification.Type, out var adapter))
|
||||
{
|
||||
notification.Status = NotificationStatus.Parked;
|
||||
notification.LastError = $"no delivery adapter for type {notification.Type}";
|
||||
notification.LastAttemptAt = now;
|
||||
await outboxRepository.UpdateAsync(notification);
|
||||
return;
|
||||
}
|
||||
|
||||
var outcome = await adapter.DeliverAsync(notification);
|
||||
|
||||
switch (outcome.Result)
|
||||
{
|
||||
case DeliveryResult.Success:
|
||||
notification.Status = NotificationStatus.Delivered;
|
||||
notification.DeliveredAt = now;
|
||||
notification.LastAttemptAt = now;
|
||||
notification.ResolvedTargets = outcome.ResolvedTargets;
|
||||
notification.LastError = null;
|
||||
break;
|
||||
|
||||
case DeliveryResult.TransientFailure:
|
||||
notification.LastAttemptAt = now;
|
||||
notification.RetryCount++;
|
||||
notification.LastError = outcome.Error;
|
||||
if (notification.RetryCount >= maxRetries)
|
||||
{
|
||||
notification.Status = NotificationStatus.Parked;
|
||||
}
|
||||
else
|
||||
{
|
||||
notification.Status = NotificationStatus.Retrying;
|
||||
notification.NextAttemptAt = now + retryDelay;
|
||||
}
|
||||
break;
|
||||
|
||||
case DeliveryResult.PermanentFailure:
|
||||
notification.Status = NotificationStatus.Parked;
|
||||
notification.LastAttemptAt = now;
|
||||
notification.LastError = outcome.Error;
|
||||
break;
|
||||
}
|
||||
|
||||
await outboxRepository.UpdateAsync(notification);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a purge tick by launching an asynchronous sweep that bulk-deletes terminal
|
||||
/// notification rows older than <see cref="NotificationOutboxOptions.TerminalRetention"/>.
|
||||
/// Purges are daily and idempotent, so no in-flight guard is needed. <see cref="RunPurgePass"/>
|
||||
/// self-isolates its faults — it logs internally and never faults its task — so the
|
||||
/// success projection is the normal completion path that logs the deleted count. The
|
||||
/// failure projection is kept as a belt-and-braces backup, consistent with
|
||||
/// <see cref="HandleDispatchTick"/>/<see cref="RunDispatchPass"/>.
|
||||
/// </summary>
|
||||
private void HandlePurgeTick()
|
||||
{
|
||||
var cutoff = DateTimeOffset.UtcNow - _options.TerminalRetention;
|
||||
|
||||
RunPurgePass(cutoff).PipeTo(
|
||||
Self,
|
||||
success: deleted =>
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Purge removed {DeletedCount} terminal notification(s) older than {Cutoff:o}.",
|
||||
deleted, cutoff);
|
||||
return InternalMessages.PurgeComplete.Instance;
|
||||
},
|
||||
failure: ex =>
|
||||
{
|
||||
_logger.LogError(ex, "Purge sweep faulted unexpectedly.");
|
||||
return InternalMessages.PurgeComplete.Instance;
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs a single purge sweep: resolves a scoped <see cref="INotificationOutboxRepository"/>
|
||||
/// and bulk-deletes terminal rows created before <paramref name="cutoff"/>, returning the
|
||||
/// deleted count. The whole body is wrapped in a try/catch so the returned task never
|
||||
/// faults — scope creation, service resolution, and the bulk delete can all throw, and
|
||||
/// self-isolating the fault here keeps the fault-handling strategy symmetric with
|
||||
/// <see cref="RunDispatchPass"/>. On failure the exception is logged and 0 is returned.
|
||||
/// </summary>
|
||||
private async Task<int> RunPurgePass(DateTimeOffset cutoff)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
return await repository.DeleteTerminalOlderThanAsync(cutoff);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Scope/service resolution or the bulk delete faulted; swallow and log so the
|
||||
// returned task completes normally, mirroring RunDispatchPass.
|
||||
_logger.LogError(ex, "Purge sweep failed unexpectedly.");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a paginated, filtered query over the outbox. Builds a
|
||||
/// <see cref="NotificationOutboxFilter"/> from the request (parsing the string status/type
|
||||
/// filters to their enums and deriving the stuck cutoff when <c>StuckOnly</c> is set),
|
||||
/// runs the query on a scoped repository, and pipes the mapped response back to the
|
||||
/// captured sender. A repository fault yields a failure response with an empty list.
|
||||
/// </summary>
|
||||
private void HandleQuery(NotificationOutboxQueryRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
QueryOutboxAsync(request, now).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new NotificationOutboxQueryResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
Notifications: Array.Empty<NotificationSummary>(),
|
||||
TotalCount: 0));
|
||||
}
|
||||
|
||||
private async Task<NotificationOutboxQueryResponse> QueryOutboxAsync(
|
||||
NotificationOutboxQueryRequest request, DateTimeOffset now)
|
||||
{
|
||||
var filter = new NotificationOutboxFilter(
|
||||
Status: ParseEnum<NotificationStatus>(request.StatusFilter),
|
||||
Type: ParseEnum<NotificationType>(request.TypeFilter),
|
||||
SourceSiteId: request.SourceSiteFilter,
|
||||
ListName: request.ListNameFilter,
|
||||
SubjectKeyword: request.SubjectKeyword,
|
||||
StuckOnly: request.StuckOnly,
|
||||
StuckCutoff: request.StuckOnly ? StuckCutoff(now) : null,
|
||||
From: request.From,
|
||||
To: request.To);
|
||||
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var (rows, totalCount) = await repository.QueryAsync(filter, request.PageNumber, request.PageSize);
|
||||
|
||||
var stuckCutoff = StuckCutoff(now);
|
||||
var summaries = rows
|
||||
.Select(row => new NotificationSummary(
|
||||
row.NotificationId,
|
||||
row.Type.ToString(),
|
||||
row.ListName,
|
||||
row.Subject,
|
||||
row.Status.ToString(),
|
||||
row.RetryCount,
|
||||
row.LastError,
|
||||
row.SourceSiteId,
|
||||
row.SourceInstanceId,
|
||||
row.CreatedAt,
|
||||
row.DeliveredAt,
|
||||
IsStuck: IsStuck(row, stuckCutoff)))
|
||||
.ToList();
|
||||
|
||||
return new NotificationOutboxQueryResponse(
|
||||
request.CorrelationId, Success: true, ErrorMessage: null, summaries, totalCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a single-notification status query. Replies <c>Found: false</c> with empty
|
||||
/// detail when no row matches, otherwise the row's current status, retry count, last
|
||||
/// error, and delivery time.
|
||||
/// </summary>
|
||||
private void HandleStatusQuery(NotificationStatusQuery query)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
StatusQueryAsync(query).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex =>
|
||||
{
|
||||
// NotificationStatusResponse has no error field, so a repository fault is
|
||||
// reported as Found: false — log the fault so a transient DB error is not
|
||||
// silently indistinguishable from a genuinely-missing notification.
|
||||
_logger.LogWarning(
|
||||
ex, "Status query for notification {NotificationId} failed.", query.NotificationId);
|
||||
return new NotificationStatusResponse(
|
||||
query.CorrelationId, Found: false, Status: string.Empty,
|
||||
RetryCount: 0, LastError: null, DeliveredAt: null);
|
||||
});
|
||||
}
|
||||
|
||||
private async Task<NotificationStatusResponse> StatusQueryAsync(NotificationStatusQuery query)
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var notification = await repository.GetByIdAsync(query.NotificationId);
|
||||
|
||||
if (notification is null)
|
||||
{
|
||||
return new NotificationStatusResponse(
|
||||
query.CorrelationId, Found: false, Status: string.Empty,
|
||||
RetryCount: 0, LastError: null, DeliveredAt: null);
|
||||
}
|
||||
|
||||
return new NotificationStatusResponse(
|
||||
query.CorrelationId,
|
||||
Found: true,
|
||||
Status: notification.Status.ToString(),
|
||||
RetryCount: notification.RetryCount,
|
||||
LastError: notification.LastError,
|
||||
DeliveredAt: notification.DeliveredAt);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a manual retry request. Only a <c>Parked</c> notification can be retried;
|
||||
/// it is reset to <c>Pending</c> with a cleared retry count, next-attempt time, and
|
||||
/// last error so the dispatch loop re-claims it on the next sweep.
|
||||
/// </summary>
|
||||
private void HandleRetry(RetryNotificationRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
RetryAsync(request).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new RetryNotificationResponse(
|
||||
request.CorrelationId, Success: false, ErrorMessage: ex.GetBaseException().Message));
|
||||
}
|
||||
|
||||
private async Task<RetryNotificationResponse> RetryAsync(RetryNotificationRequest request)
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var notification = await repository.GetByIdAsync(request.NotificationId);
|
||||
|
||||
if (notification is null)
|
||||
{
|
||||
return new RetryNotificationResponse(
|
||||
request.CorrelationId, Success: false, ErrorMessage: "notification not found");
|
||||
}
|
||||
|
||||
if (notification.Status != NotificationStatus.Parked)
|
||||
{
|
||||
return new RetryNotificationResponse(
|
||||
request.CorrelationId, Success: false,
|
||||
ErrorMessage: "only parked notifications can be retried");
|
||||
}
|
||||
|
||||
notification.Status = NotificationStatus.Pending;
|
||||
notification.RetryCount = 0;
|
||||
notification.NextAttemptAt = null;
|
||||
notification.LastError = null;
|
||||
await repository.UpdateAsync(notification);
|
||||
|
||||
return new RetryNotificationResponse(request.CorrelationId, Success: true, ErrorMessage: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a manual discard request. Only a <c>Parked</c> notification can be discarded;
|
||||
/// it is moved to the terminal <c>Discarded</c> status.
|
||||
/// </summary>
|
||||
private void HandleDiscard(DiscardNotificationRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
DiscardAsync(request).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new DiscardNotificationResponse(
|
||||
request.CorrelationId, Success: false, ErrorMessage: ex.GetBaseException().Message));
|
||||
}
|
||||
|
||||
private async Task<DiscardNotificationResponse> DiscardAsync(DiscardNotificationRequest request)
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var notification = await repository.GetByIdAsync(request.NotificationId);
|
||||
|
||||
if (notification is null)
|
||||
{
|
||||
return new DiscardNotificationResponse(
|
||||
request.CorrelationId, Success: false, ErrorMessage: "notification not found");
|
||||
}
|
||||
|
||||
if (notification.Status != NotificationStatus.Parked)
|
||||
{
|
||||
return new DiscardNotificationResponse(
|
||||
request.CorrelationId, Success: false,
|
||||
ErrorMessage: "only parked notifications can be discarded");
|
||||
}
|
||||
|
||||
notification.Status = NotificationStatus.Discarded;
|
||||
await repository.UpdateAsync(notification);
|
||||
|
||||
return new DiscardNotificationResponse(request.CorrelationId, Success: true, ErrorMessage: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a KPI snapshot request, computing the outbox metrics with the stuck cutoff
|
||||
/// derived from <see cref="NotificationOutboxOptions.StuckAgeThreshold"/> and the
|
||||
/// delivered window from <see cref="NotificationOutboxOptions.DeliveredKpiWindow"/>.
|
||||
/// </summary>
|
||||
private void HandleKpiRequest(NotificationKpiRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var stuckCutoff = StuckCutoff(now);
|
||||
var deliveredSince = now - _options.DeliveredKpiWindow;
|
||||
|
||||
ComputeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new NotificationKpiResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
QueueDepth: 0,
|
||||
StuckCount: 0,
|
||||
ParkedCount: 0,
|
||||
DeliveredLastInterval: 0,
|
||||
OldestPendingAge: null));
|
||||
}
|
||||
|
||||
private async Task<NotificationKpiResponse> ComputeKpisAsync(
|
||||
string correlationId, DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince)
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var snapshot = await repository.ComputeKpisAsync(stuckCutoff, deliveredSince);
|
||||
|
||||
return new NotificationKpiResponse(
|
||||
correlationId,
|
||||
Success: true,
|
||||
ErrorMessage: null,
|
||||
snapshot.QueueDepth,
|
||||
snapshot.StuckCount,
|
||||
snapshot.ParkedCount,
|
||||
snapshot.DeliveredLastInterval,
|
||||
snapshot.OldestPendingAge);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a per-site KPI request, computing the per-source-site outbox metrics with the
|
||||
/// same stuck cutoff and delivered window as <see cref="HandleKpiRequest"/>.
|
||||
/// </summary>
|
||||
private void HandlePerSiteKpiRequest(PerSiteNotificationKpiRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var stuckCutoff = StuckCutoff(now);
|
||||
var deliveredSince = now - _options.DeliveredKpiWindow;
|
||||
|
||||
ComputePerSiteKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new PerSiteNotificationKpiResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
Sites: Array.Empty<SiteNotificationKpiSnapshot>()));
|
||||
}
|
||||
|
||||
private async Task<PerSiteNotificationKpiResponse> ComputePerSiteKpisAsync(
|
||||
string correlationId, DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince)
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var sites = await repository.ComputePerSiteKpisAsync(stuckCutoff, deliveredSince);
|
||||
|
||||
return new PerSiteNotificationKpiResponse(correlationId, Success: true, ErrorMessage: null, sites);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The instant before which a still-pending notification counts as stuck — <paramref name="now"/>
|
||||
/// offset back by <see cref="NotificationOutboxOptions.StuckAgeThreshold"/>.
|
||||
/// </summary>
|
||||
private DateTimeOffset StuckCutoff(DateTimeOffset now) => now - _options.StuckAgeThreshold;
|
||||
|
||||
/// <summary>
|
||||
/// A notification counts as stuck when it is still in a non-terminal status
|
||||
/// (<c>Pending</c> or <c>Retrying</c>) and was created before the supplied cutoff.
|
||||
/// </summary>
|
||||
private static bool IsStuck(Notification notification, DateTimeOffset stuckCutoff)
|
||||
{
|
||||
return notification.Status is NotificationStatus.Pending or NotificationStatus.Retrying
|
||||
&& notification.CreatedAt < stuckCutoff;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses a string filter value to a nullable enum, ignoring case. An empty, whitespace,
|
||||
/// or unrecognised value yields <c>null</c> — meaning "no constraint on that dimension".
|
||||
/// </summary>
|
||||
private static TEnum? ParseEnum<TEnum>(string? value) where TEnum : struct, Enum
|
||||
{
|
||||
return Enum.TryParse<TEnum>(value, ignoreCase: true, out var parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
private static Notification BuildNotification(NotificationSubmit msg)
|
||||
{
|
||||
// All current notifications are email; NotificationType has only the Email member.
|
||||
return new Notification(
|
||||
msg.NotificationId,
|
||||
NotificationType.Email,
|
||||
msg.ListName,
|
||||
msg.Subject,
|
||||
msg.Body,
|
||||
msg.SourceSiteId)
|
||||
{
|
||||
SourceInstanceId = msg.SourceInstanceId,
|
||||
SourceScript = msg.SourceScript,
|
||||
SiteEnqueuedAt = msg.SiteEnqueuedAt,
|
||||
CreatedAt = DateTimeOffset.UtcNow,
|
||||
// Status stays at its Pending default for the dispatch sweep to claim.
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
namespace ScadaLink.NotificationOutbox;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for the Notification Outbox component: dispatch cadence,
|
||||
/// batch sizing, stuck-message detection, terminal retention, and KPI windowing.
|
||||
/// </summary>
|
||||
public class NotificationOutboxOptions
|
||||
{
|
||||
/// <summary>Interval between dispatch sweeps that pick up pending notifications for delivery.</summary>
|
||||
public TimeSpan DispatchInterval { get; set; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <summary>Maximum number of notifications claimed for delivery in a single dispatch sweep.</summary>
|
||||
public int DispatchBatchSize { get; set; } = 100;
|
||||
|
||||
/// <summary>Age past which an in-progress notification is considered stuck and re-claimed.</summary>
|
||||
public TimeSpan StuckAgeThreshold { get; set; } = TimeSpan.FromMinutes(10);
|
||||
|
||||
/// <summary>Retention period for notifications in a terminal state before they are purged.</summary>
|
||||
public TimeSpan TerminalRetention { get; set; } = TimeSpan.FromDays(365);
|
||||
|
||||
/// <summary>Interval between background purge sweeps of terminal notifications.</summary>
|
||||
public TimeSpan PurgeInterval { get; set; } = TimeSpan.FromDays(1);
|
||||
|
||||
/// <summary>Trailing window used to compute the delivered-notifications throughput KPI.</summary>
|
||||
public TimeSpan DeliveredKpiWindow { get; set; } = TimeSpan.FromMinutes(1);
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Akka" />
|
||||
<PackageReference Include="Akka.Cluster.Tools" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
||||
<!-- Email delivery adapter reuses the NotificationService SMTP machinery
|
||||
(ISmtpClientWrapper, SmtpPermanentException, SmtpTlsModeParser,
|
||||
OAuth2TokenService). -->
|
||||
<ProjectReference Include="../ScadaLink.NotificationService/ScadaLink.NotificationService.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="ScadaLink.NotificationOutbox.Tests" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,49 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using ScadaLink.NotificationOutbox.Delivery;
|
||||
|
||||
namespace ScadaLink.NotificationOutbox;
|
||||
|
||||
/// <summary>
|
||||
/// DI registration for the Notification Outbox component: binds
|
||||
/// <see cref="NotificationOutboxOptions"/> and registers the channel delivery adapters.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>Configuration section bound to <see cref="NotificationOutboxOptions"/>.</summary>
|
||||
public const string OptionsSection = "ScadaLink:NotificationOutbox";
|
||||
|
||||
/// <summary>
|
||||
/// Registers the Notification Outbox services: the <see cref="NotificationOutboxOptions"/>
|
||||
/// binding and the channel delivery adapters.
|
||||
///
|
||||
/// This extension covers only the outbox-specific registrations. The
|
||||
/// <see cref="EmailNotificationDeliveryAdapter"/> reuses the
|
||||
/// <see cref="ScadaLink.NotificationService"/> SMTP machinery —
|
||||
/// <c>Func<ISmtpClientWrapper></c>, <c>OAuth2TokenService</c> and
|
||||
/// <c>NotificationOptions</c> — so the caller (the Host on the central node) must also
|
||||
/// call <c>AddNotificationService()</c>. Re-registering those services here would
|
||||
/// duplicate them; relying on <c>AddNotificationService</c> keeps a single source of truth.
|
||||
///
|
||||
/// <see cref="EmailNotificationDeliveryAdapter"/> is registered <em>scoped</em> because it
|
||||
/// takes a scoped <see cref="ScadaLink.Commons.Interfaces.Repositories.INotificationRepository"/>
|
||||
/// directly. The <see cref="NotificationOutboxActor"/> resolves the adapters from a fresh
|
||||
/// scope per dispatch sweep rather than holding them, so no scoped adapter is captured by
|
||||
/// the singleton actor.
|
||||
/// </summary>
|
||||
public static IServiceCollection AddNotificationOutbox(this IServiceCollection services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
services.AddOptions<NotificationOutboxOptions>()
|
||||
.BindConfiguration(OptionsSection);
|
||||
|
||||
// Scoped: the adapter holds a scoped INotificationRepository. Registered both under
|
||||
// the interface (so the dispatch sweep can enumerate every channel adapter) and as
|
||||
// the concrete type (so callers and tests can resolve it directly).
|
||||
services.AddScoped<EmailNotificationDeliveryAdapter>();
|
||||
services.AddScoped<INotificationDeliveryAdapter>(
|
||||
sp => sp.GetRequiredService<EmailNotificationDeliveryAdapter>());
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
@@ -6,8 +6,12 @@ namespace ScadaLink.NotificationService;
|
||||
/// MailKit authentication exceptions can contain server responses that quote the
|
||||
/// supplied credentials; this prevents a password, client secret, or OAuth2 token
|
||||
/// from leaking into the operational logs.
|
||||
/// <para>
|
||||
/// Public so the central Notification Outbox's <c>EmailNotificationDeliveryAdapter</c>
|
||||
/// can share this exact redaction logic rather than carry a divergent copy.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
internal static class CredentialRedactor
|
||||
public static class CredentialRedactor
|
||||
{
|
||||
private const string Mask = "***REDACTED***";
|
||||
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
using MimeKit;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
|
||||
namespace ScadaLink.NotificationService;
|
||||
|
||||
/// <summary>
|
||||
/// NS-008: Validates the sender and recipient email addresses before an SMTP
|
||||
/// delivery is attempted, so a malformed address surfaces as a clean error
|
||||
/// string rather than a <c>ParseException</c> escaping the delivery path.
|
||||
/// <para>
|
||||
/// Public so the central Notification Outbox's <c>EmailNotificationDeliveryAdapter</c>
|
||||
/// can share this exact pre-send validation rather than carry a divergent copy.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public static class EmailAddressValidator
|
||||
{
|
||||
/// <summary>
|
||||
/// Validates the sender and recipient email addresses, returning a
|
||||
/// human-readable error string if any is malformed, or null if all parse.
|
||||
/// </summary>
|
||||
public static string? ValidateAddresses(
|
||||
string fromAddress, IReadOnlyList<NotificationRecipient> recipients)
|
||||
{
|
||||
if (!MailboxAddress.TryParse(fromAddress, out _))
|
||||
{
|
||||
return $"Invalid sender (from) email address: '{fromAddress}'";
|
||||
}
|
||||
|
||||
var invalid = recipients
|
||||
.Where(r => !MailboxAddress.TryParse(r.EmailAddress, out _))
|
||||
.Select(r => r.EmailAddress)
|
||||
.ToList();
|
||||
|
||||
return invalid.Count > 0
|
||||
? $"Invalid recipient email address(es): {string.Join(", ", invalid)}"
|
||||
: null;
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,6 @@
|
||||
using System.Net.Sockets;
|
||||
using System.Text.Json;
|
||||
using MailKit;
|
||||
using MailKit.Net.Smtp;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using MimeKit;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
@@ -94,7 +90,7 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
// malformed address previously caused MailboxAddress.Parse to throw a
|
||||
// ParseException that escaped SendAsync unhandled; it must instead produce a
|
||||
// clean NotificationResult the calling script can handle.
|
||||
var addressError = ValidateAddresses(smtpConfig.FromAddress, recipients);
|
||||
var addressError = EmailAddressValidator.ValidateAddresses(smtpConfig.FromAddress, recipients);
|
||||
if (addressError != null)
|
||||
{
|
||||
_logger.LogWarning("Notification to list {List} has invalid addresses: {Reason}", listName, addressError);
|
||||
@@ -121,7 +117,7 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
// NS-002: a caller-requested cancellation propagates; it is not buffered.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (IsTransientSmtpError(ex, cancellationToken))
|
||||
catch (Exception ex) when (SmtpErrorClassifier.IsTransient(ex, cancellationToken))
|
||||
{
|
||||
// WP-12: Transient SMTP failure — hand to S&F.
|
||||
// NS-009: scrub credential fragments before logging.
|
||||
@@ -156,7 +152,7 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// NS-015: a failure that ClassifySmtpError does not recognise (Unknown) —
|
||||
// NS-015: a failure that SmtpErrorClassifier does not recognise (Unknown) —
|
||||
// most importantly an OAuth2 token-fetch failure (HttpRequestException
|
||||
// from EnsureSuccessStatusCode, or InvalidOperationException from a
|
||||
// malformed credential triple) — used to fall through all the catch
|
||||
@@ -228,7 +224,7 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
}
|
||||
|
||||
// NS-008: a malformed address cannot be fixed by retrying — park it.
|
||||
var addressError = ValidateAddresses(smtpConfig.FromAddress, recipients);
|
||||
var addressError = EmailAddressValidator.ValidateAddresses(smtpConfig.FromAddress, recipients);
|
||||
if (addressError != null)
|
||||
{
|
||||
_logger.LogError(
|
||||
@@ -256,14 +252,14 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
// permanent failure — let it propagate so the engine does not park.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (IsTransientSmtpError(ex, cancellationToken))
|
||||
catch (Exception ex) when (SmtpErrorClassifier.IsTransient(ex, cancellationToken))
|
||||
{
|
||||
// A typed transient SMTP error: re-throw so the S&F engine retries.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// NS-014: an exception ClassifySmtpError does not recognise (Unknown) —
|
||||
// NS-014: an exception SmtpErrorClassifier does not recognise (Unknown) —
|
||||
// chiefly an OAuth2 token-fetch failure — used to escape this handler.
|
||||
// The S&F engine treats ANY thrown exception as transient, so a
|
||||
// permanently-broken config (bad client secret, malformed credential
|
||||
@@ -347,28 +343,6 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// NS-008: Validates the sender and recipient email addresses, returning a
|
||||
/// human-readable error string if any is malformed, or null if all parse.
|
||||
/// </summary>
|
||||
internal static string? ValidateAddresses(
|
||||
string fromAddress, IReadOnlyList<NotificationRecipient> recipients)
|
||||
{
|
||||
if (!MailboxAddress.TryParse(fromAddress, out _))
|
||||
{
|
||||
return $"Invalid sender (from) email address: '{fromAddress}'";
|
||||
}
|
||||
|
||||
var invalid = recipients
|
||||
.Where(r => !MailboxAddress.TryParse(r.EmailAddress, out _))
|
||||
.Select(r => r.EmailAddress)
|
||||
.ToList();
|
||||
|
||||
return invalid.Count > 0
|
||||
? $"Invalid recipient email address(es): {string.Join(", ", invalid)}"
|
||||
: null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Delivers an email via SMTP. Throws on failure (transient errors and
|
||||
/// <see cref="SmtpPermanentException"/> propagate; the caller classifies them).
|
||||
@@ -420,14 +394,14 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
// not be misclassified as a transient SMTP failure and buffered for retry.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (ClassifySmtpError(ex, cancellationToken) == SmtpErrorClass.Permanent
|
||||
catch (Exception ex) when (SmtpErrorClassifier.Classify(ex, cancellationToken) == SmtpErrorClass.Permanent
|
||||
&& ex is not SmtpPermanentException)
|
||||
{
|
||||
// NS-003: Permanent SMTP failure (5xx) — surface a typed permanent exception.
|
||||
throw new SmtpPermanentException(ex.Message, ex);
|
||||
}
|
||||
// Transient and SmtpPermanentException both propagate unchanged: SendAsync's
|
||||
// catch filters (SmtpPermanentException / IsTransientSmtpError) handle them.
|
||||
// catch filters (SmtpPermanentException / SmtpErrorClassifier.IsTransient) handle them.
|
||||
finally
|
||||
{
|
||||
// NS-010: always tear the connection down, regardless of outcome. The
|
||||
@@ -451,64 +425,4 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
|
||||
limiter.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private enum SmtpErrorClass
|
||||
{
|
||||
/// <summary>Cancellation or an unrecognised exception — caller decides.</summary>
|
||||
Unknown,
|
||||
/// <summary>Retryable failure (4xx, connection/socket/protocol error, timeout).</summary>
|
||||
Transient,
|
||||
/// <summary>Non-retryable failure (5xx) — must be returned to the script.</summary>
|
||||
Permanent,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// NS-002/NS-003: Classifies an SMTP failure using MailKit's typed exceptions and
|
||||
/// the numeric <see cref="SmtpStatusCode"/> rather than locale-dependent substring
|
||||
/// matching on the exception message. A cancellation requested by the caller is
|
||||
/// never treated as a transient SMTP error.
|
||||
/// </summary>
|
||||
private static SmtpErrorClass ClassifySmtpError(Exception ex, CancellationToken cancellationToken)
|
||||
{
|
||||
// A deliberate cancellation is not an SMTP error at all.
|
||||
if (ex is OperationCanceledException && cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return SmtpErrorClass.Unknown;
|
||||
}
|
||||
|
||||
// MailKit reports SMTP command failures with the real status code; the
|
||||
// SmtpStatusCode enum's underlying value is the numeric SMTP reply code.
|
||||
if (ex is SmtpCommandException command)
|
||||
{
|
||||
var code = (int)command.StatusCode;
|
||||
if (code >= 400 && code < 500)
|
||||
{
|
||||
return SmtpErrorClass.Transient;
|
||||
}
|
||||
|
||||
if (code >= 500 && code < 600)
|
||||
{
|
||||
return SmtpErrorClass.Permanent;
|
||||
}
|
||||
|
||||
return SmtpErrorClass.Unknown;
|
||||
}
|
||||
|
||||
// Protocol errors, a dropped/unavailable service, socket failures and
|
||||
// timeouts are all retryable — the message has not been rejected.
|
||||
if (ex is SmtpProtocolException
|
||||
or ServiceNotConnectedException
|
||||
or SocketException
|
||||
or TimeoutException)
|
||||
{
|
||||
return SmtpErrorClass.Transient;
|
||||
}
|
||||
|
||||
return SmtpErrorClass.Unknown;
|
||||
}
|
||||
|
||||
private static bool IsTransientSmtpError(Exception ex, CancellationToken cancellationToken)
|
||||
{
|
||||
return ClassifySmtpError(ex, cancellationToken) == SmtpErrorClass.Transient;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
using System.Net.Sockets;
|
||||
using MailKit;
|
||||
using MailKit.Net.Smtp;
|
||||
|
||||
namespace ScadaLink.NotificationService;
|
||||
|
||||
/// <summary>
|
||||
/// NS-002/NS-003: The classification of an SMTP delivery failure. This decides
|
||||
/// whether a failure is retried or surfaced to the caller, so it is part of the
|
||||
/// system's correctness-relevant behaviour.
|
||||
/// </summary>
|
||||
public enum SmtpErrorClass
|
||||
{
|
||||
/// <summary>Cancellation or an unrecognised exception — caller decides.</summary>
|
||||
Unknown,
|
||||
|
||||
/// <summary>Retryable failure (4xx, connection/socket/protocol error, timeout).</summary>
|
||||
Transient,
|
||||
|
||||
/// <summary>Non-retryable failure (5xx) — must not be retried.</summary>
|
||||
Permanent,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// NS-002/NS-003: Classifies an SMTP failure using MailKit's typed exceptions and
|
||||
/// the numeric <see cref="SmtpStatusCode"/> rather than locale-dependent substring
|
||||
/// matching on the exception message.
|
||||
/// <para>
|
||||
/// Public and shared: both <see cref="NotificationDeliveryService"/> (store-and-forward
|
||||
/// delivery) and the central Notification Outbox's <c>EmailNotificationDeliveryAdapter</c>
|
||||
/// route every SMTP failure through this single policy, so a transient/permanent
|
||||
/// boundary change cannot diverge between the two delivery paths.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public static class SmtpErrorClassifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Classifies an SMTP failure. A cancellation requested by the caller is never
|
||||
/// treated as a transient SMTP error.
|
||||
/// </summary>
|
||||
/// <param name="ex">The exception thrown by the SMTP send sequence.</param>
|
||||
/// <param name="cancellationToken">
|
||||
/// The token governing the send; a requested cancellation classifies as
|
||||
/// <see cref="SmtpErrorClass.Unknown"/> so the caller can re-throw it.
|
||||
/// </param>
|
||||
public static SmtpErrorClass Classify(Exception ex, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(ex);
|
||||
|
||||
// A deliberate cancellation is not an SMTP error at all.
|
||||
if (ex is OperationCanceledException && cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return SmtpErrorClass.Unknown;
|
||||
}
|
||||
|
||||
// MailKit reports SMTP command failures with the real status code; the
|
||||
// SmtpStatusCode enum's underlying value is the numeric SMTP reply code.
|
||||
if (ex is SmtpCommandException command)
|
||||
{
|
||||
var code = (int)command.StatusCode;
|
||||
if (code >= 400 && code < 500)
|
||||
{
|
||||
return SmtpErrorClass.Transient;
|
||||
}
|
||||
|
||||
if (code >= 500 && code < 600)
|
||||
{
|
||||
return SmtpErrorClass.Permanent;
|
||||
}
|
||||
|
||||
return SmtpErrorClass.Unknown;
|
||||
}
|
||||
|
||||
// Protocol errors, a dropped/unavailable service, socket failures and
|
||||
// timeouts are all retryable — the message has not been rejected.
|
||||
if (ex is SmtpProtocolException
|
||||
or ServiceNotConnectedException
|
||||
or SocketException
|
||||
or TimeoutException)
|
||||
{
|
||||
return SmtpErrorClass.Transient;
|
||||
}
|
||||
|
||||
return SmtpErrorClass.Unknown;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convenience predicate: true when <see cref="Classify"/> returns
|
||||
/// <see cref="SmtpErrorClass.Transient"/>.
|
||||
/// </summary>
|
||||
public static bool IsTransient(Exception ex, CancellationToken cancellationToken)
|
||||
=> Classify(ex, cancellationToken) == SmtpErrorClass.Transient;
|
||||
}
|
||||
@@ -8,6 +8,7 @@ using ScadaLink.Commons.Types;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using ScadaLink.SiteEventLogging;
|
||||
using ScadaLink.SiteRuntime.Scripts;
|
||||
using ScadaLink.StoreAndForward;
|
||||
|
||||
namespace ScadaLink.SiteRuntime.Actors;
|
||||
|
||||
@@ -78,6 +79,11 @@ public class ScriptExecutionActor : ReceiveActor
|
||||
// starve the global pool and stall Akka dispatchers / HTTP handling.
|
||||
var scheduler = ScriptExecutionScheduler.Shared(options);
|
||||
|
||||
// Notification Outbox: the site communication actor that Notify.Status queries
|
||||
// central through. Resolved by actor path so the Notify helper does not need an
|
||||
// IActorRef threaded all the way down from the host wiring.
|
||||
var siteCommunicationActor = Context.System.ActorSelection("/user/site-communication");
|
||||
|
||||
// CTS must be created inside the async lambda so it outlives this method
|
||||
_ = Task.Factory.StartNew(async () =>
|
||||
{
|
||||
@@ -91,14 +97,19 @@ public class ScriptExecutionActor : ReceiveActor
|
||||
// Resolve integration services from DI (scoped lifetime)
|
||||
IExternalSystemClient? externalSystemClient = null;
|
||||
IDatabaseGateway? databaseGateway = null;
|
||||
INotificationDeliveryService? notificationService = null;
|
||||
// Notification Outbox: the S&F engine is a singleton; the site identity
|
||||
// provider supplies the site id stamped on enqueued notifications.
|
||||
StoreAndForwardService? storeAndForward = null;
|
||||
var siteId = string.Empty;
|
||||
|
||||
if (serviceProvider != null)
|
||||
{
|
||||
serviceScope = serviceProvider.CreateScope();
|
||||
externalSystemClient = serviceScope.ServiceProvider.GetService<IExternalSystemClient>();
|
||||
databaseGateway = serviceScope.ServiceProvider.GetService<IDatabaseGateway>();
|
||||
notificationService = serviceScope.ServiceProvider.GetService<INotificationDeliveryService>();
|
||||
storeAndForward = serviceScope.ServiceProvider.GetService<StoreAndForwardService>();
|
||||
siteId = serviceScope.ServiceProvider.GetService<ISiteIdentityProvider>()?.SiteId
|
||||
?? string.Empty;
|
||||
}
|
||||
|
||||
var context = new ScriptRuntimeContext(
|
||||
@@ -112,7 +123,12 @@ public class ScriptExecutionActor : ReceiveActor
|
||||
logger,
|
||||
externalSystemClient,
|
||||
databaseGateway,
|
||||
notificationService);
|
||||
storeAndForward,
|
||||
siteCommunicationActor,
|
||||
siteId,
|
||||
// Notification Outbox (FU3): stamp the executing script onto outbound
|
||||
// notifications using the Site Event Logging "Source" convention.
|
||||
sourceScript: $"ScriptActor:{scriptName}");
|
||||
|
||||
var globals = new ScriptGlobals
|
||||
{
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
using System.Text.Json;
|
||||
using Akka.Actor;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Messages.Instance;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Messages.ScriptExecution;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.StoreAndForward;
|
||||
|
||||
namespace ScadaLink.SiteRuntime.Scripts;
|
||||
|
||||
@@ -46,9 +50,30 @@ public class ScriptRuntimeContext
|
||||
private readonly IDatabaseGateway? _databaseGateway;
|
||||
|
||||
/// <summary>
|
||||
/// WP-13: Notification delivery for Notify.To().Send().
|
||||
/// Notification Outbox: the site Store-and-Forward Engine that <c>Notify.Send</c>
|
||||
/// enqueues notifications into. The S&F engine forwards them to central.
|
||||
/// </summary>
|
||||
private readonly INotificationDeliveryService? _notificationService;
|
||||
private readonly StoreAndForwardService? _storeAndForward;
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox: the site communication actor that <c>Notify.Status</c>
|
||||
/// queries central through (via the ClusterClient command/control transport).
|
||||
/// </summary>
|
||||
private readonly ICanTell? _siteCommunicationActor;
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox: this site's identifier, stamped on enqueued notifications.
|
||||
/// </summary>
|
||||
private readonly string _siteId;
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox (FU3): identifier of the script currently executing in this
|
||||
/// context — stamped onto <c>NotificationSubmit.SourceScript</c> for the central
|
||||
/// audit trail. Uses the Site Event Logging "Source" convention
|
||||
/// (<c>"ScriptActor:<scriptName>"</c>). Null when no single script owns the
|
||||
/// context (e.g. alarm on-trigger paths that do not wire the Notify outbox).
|
||||
/// </summary>
|
||||
private readonly string? _sourceScript;
|
||||
|
||||
public ScriptRuntimeContext(
|
||||
IActorRef instanceActor,
|
||||
@@ -61,7 +86,10 @@ public class ScriptRuntimeContext
|
||||
ILogger logger,
|
||||
IExternalSystemClient? externalSystemClient = null,
|
||||
IDatabaseGateway? databaseGateway = null,
|
||||
INotificationDeliveryService? notificationService = null)
|
||||
StoreAndForwardService? storeAndForward = null,
|
||||
ICanTell? siteCommunicationActor = null,
|
||||
string siteId = "",
|
||||
string? sourceScript = null)
|
||||
{
|
||||
_instanceActor = instanceActor;
|
||||
_self = self;
|
||||
@@ -73,7 +101,10 @@ public class ScriptRuntimeContext
|
||||
_logger = logger;
|
||||
_externalSystemClient = externalSystemClient;
|
||||
_databaseGateway = databaseGateway;
|
||||
_notificationService = notificationService;
|
||||
_storeAndForward = storeAndForward;
|
||||
_siteCommunicationActor = siteCommunicationActor;
|
||||
_siteId = siteId;
|
||||
_sourceScript = sourceScript;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -183,10 +214,13 @@ public class ScriptRuntimeContext
|
||||
public DatabaseHelper Database => new(_databaseGateway, _instanceName, _logger);
|
||||
|
||||
/// <summary>
|
||||
/// WP-13: Provides access to notification delivery.
|
||||
/// Notify.To("listName").Send("subject", "message")
|
||||
/// Provides access to the Notification Outbox API.
|
||||
/// <c>Notify.To("listName").Send("subject", "message")</c> enqueues a notification
|
||||
/// for central delivery and returns its <c>NotificationId</c>;
|
||||
/// <c>Notify.Status(id)</c> queries the delivery status of that notification.
|
||||
/// </summary>
|
||||
public NotifyHelper Notify => new(_notificationService, _instanceName, _logger);
|
||||
public NotifyHelper Notify => new(
|
||||
_storeAndForward, _siteCommunicationActor, _siteId, _instanceName, _sourceScript, _askTimeout, _logger);
|
||||
|
||||
/// <summary>
|
||||
/// Helper class for Scripts.CallShared() syntax.
|
||||
@@ -319,54 +353,197 @@ public class ScriptRuntimeContext
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-13: Helper for Notify.To("listName").Send("subject", "message") syntax.
|
||||
/// Notification Outbox: helper for the <c>Notify</c> script API.
|
||||
///
|
||||
/// In the outbox design the site no longer delivers notification email inline.
|
||||
/// <c>Notify.To("listName").Send(...)</c> enqueues the notification into the site
|
||||
/// Store-and-Forward Engine — which forwards it to central — and returns a
|
||||
/// <c>NotificationId</c> handle immediately. <c>Notify.Status(id)</c> later queries
|
||||
/// the delivery status of that notification.
|
||||
/// </summary>
|
||||
public class NotifyHelper
|
||||
{
|
||||
private readonly INotificationDeliveryService? _service;
|
||||
private readonly StoreAndForwardService? _storeAndForward;
|
||||
private readonly ICanTell? _siteCommunicationActor;
|
||||
private readonly string _siteId;
|
||||
private readonly string _instanceName;
|
||||
private readonly string? _sourceScript;
|
||||
private readonly TimeSpan _askTimeout;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
internal NotifyHelper(INotificationDeliveryService? service, string instanceName, ILogger logger)
|
||||
internal NotifyHelper(
|
||||
StoreAndForwardService? storeAndForward,
|
||||
ICanTell? siteCommunicationActor,
|
||||
string siteId,
|
||||
string instanceName,
|
||||
string? sourceScript,
|
||||
TimeSpan askTimeout,
|
||||
ILogger logger)
|
||||
{
|
||||
_service = service;
|
||||
_storeAndForward = storeAndForward;
|
||||
_siteCommunicationActor = siteCommunicationActor;
|
||||
_siteId = siteId;
|
||||
_instanceName = instanceName;
|
||||
_sourceScript = sourceScript;
|
||||
_askTimeout = askTimeout;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Selects the notification list to send to.
|
||||
/// </summary>
|
||||
public NotifyTarget To(string listName)
|
||||
{
|
||||
return new NotifyTarget(listName, _service, _instanceName, _logger);
|
||||
return new NotifyTarget(
|
||||
listName, _storeAndForward, _siteId, _instanceName, _sourceScript, _logger);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries the delivery status of a previously-sent notification.
|
||||
///
|
||||
/// The query is issued to central via the site communication actor. While the
|
||||
/// notification is still buffered in the site Store-and-Forward Engine — central
|
||||
/// has no row for it yet (<c>Found: false</c>) but the buffer still holds the id —
|
||||
/// the status is reported as the site-local <c>Forwarding</c> state. If central
|
||||
/// has a row, its status is mapped through verbatim. If central does not know the
|
||||
/// id and it is not buffered locally, the status is <c>Unknown</c>.
|
||||
/// </summary>
|
||||
public async Task<NotificationDeliveryStatus> Status(string notificationId)
|
||||
{
|
||||
if (_siteCommunicationActor == null)
|
||||
throw new InvalidOperationException(
|
||||
"Notification status query is not available — site communication actor not wired");
|
||||
|
||||
var correlationId = Guid.NewGuid().ToString();
|
||||
var query = new NotificationStatusQuery(correlationId, notificationId);
|
||||
|
||||
NotificationStatusResponse response;
|
||||
try
|
||||
{
|
||||
response = await _siteCommunicationActor
|
||||
.Ask<NotificationStatusResponse>(query, _askTimeout);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Central could not be reached. Fall through to the buffer check: if the
|
||||
// notification is still in the local S&F buffer it is Forwarding.
|
||||
_logger.LogWarning(ex,
|
||||
"Notification status query for {NotificationId} did not reach central",
|
||||
notificationId);
|
||||
response = new NotificationStatusResponse(
|
||||
correlationId, Found: false, Status: "Unknown",
|
||||
RetryCount: 0, LastError: null, DeliveredAt: null);
|
||||
}
|
||||
|
||||
if (response.Found)
|
||||
{
|
||||
return new NotificationDeliveryStatus(
|
||||
response.Status, response.RetryCount, response.LastError, response.DeliveredAt);
|
||||
}
|
||||
|
||||
// Central has no row. If the notification is still buffered at the site it
|
||||
// is in transit — report the site-local Forwarding state. Otherwise it is
|
||||
// genuinely unknown (never sent, or already forwarded and central lost it).
|
||||
if (_storeAndForward != null)
|
||||
{
|
||||
var buffered = await _storeAndForward.GetMessageByIdAsync(notificationId);
|
||||
if (buffered != null)
|
||||
{
|
||||
return new NotificationDeliveryStatus(
|
||||
"Forwarding", buffered.RetryCount, buffered.LastError, DeliveredAt: null);
|
||||
}
|
||||
}
|
||||
|
||||
return new NotificationDeliveryStatus("Unknown", 0, null, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-13: Target for Notify.To("listName").Send("subject", "message").
|
||||
/// Notification Outbox: target for <c>Notify.To("listName").Send(...)</c>.
|
||||
/// </summary>
|
||||
public class NotifyTarget
|
||||
{
|
||||
private readonly string _listName;
|
||||
private readonly INotificationDeliveryService? _service;
|
||||
private readonly StoreAndForwardService? _storeAndForward;
|
||||
private readonly string _siteId;
|
||||
private readonly string _instanceName;
|
||||
private readonly string? _sourceScript;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
internal NotifyTarget(string listName, INotificationDeliveryService? service, string instanceName, ILogger logger)
|
||||
internal NotifyTarget(
|
||||
string listName,
|
||||
StoreAndForwardService? storeAndForward,
|
||||
string siteId,
|
||||
string instanceName,
|
||||
string? sourceScript,
|
||||
ILogger logger)
|
||||
{
|
||||
_listName = listName;
|
||||
_service = service;
|
||||
_storeAndForward = storeAndForward;
|
||||
_siteId = siteId;
|
||||
_instanceName = instanceName;
|
||||
_sourceScript = sourceScript;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<NotificationResult> Send(
|
||||
/// <summary>
|
||||
/// Enqueues a notification for central delivery and returns its
|
||||
/// <c>NotificationId</c> immediately.
|
||||
///
|
||||
/// The notification is buffered into the site Store-and-Forward Engine under the
|
||||
/// <see cref="StoreAndForwardCategory.Notification"/> category; the S&F
|
||||
/// engine's <c>NotificationForwarder</c> forwards it to central and treats
|
||||
/// central's ack as the delivery outcome. The returned <c>NotificationId</c> is
|
||||
/// the single idempotency key end-to-end: it is the S&F message id, it is
|
||||
/// carried inside the buffered payload, and it is the id the forwarder submits to
|
||||
/// central. Pass it to <see cref="NotifyHelper.Status"/> to track delivery.
|
||||
/// </summary>
|
||||
public async Task<string> Send(
|
||||
string subject,
|
||||
string message,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_service == null)
|
||||
throw new InvalidOperationException("Notification service not available");
|
||||
if (_storeAndForward == null)
|
||||
throw new InvalidOperationException(
|
||||
"Notification store-and-forward engine not available");
|
||||
|
||||
return await _service.SendAsync(_listName, subject, message, _instanceName, cancellationToken);
|
||||
// The script controls the idempotency key: generate the NotificationId here,
|
||||
// use it as the S&F message id, and carry it inside the buffered payload so
|
||||
// the forwarder submits the same id to central on every retry.
|
||||
var notificationId = Guid.NewGuid().ToString("N");
|
||||
|
||||
var payload = new NotificationSubmit(
|
||||
NotificationId: notificationId,
|
||||
ListName: _listName,
|
||||
Subject: subject,
|
||||
Body: message,
|
||||
// SourceSiteId is re-stamped by the forwarder from its own site id; this
|
||||
// value is the best-effort site id known to the script runtime.
|
||||
SourceSiteId: _siteId,
|
||||
SourceInstanceId: _instanceName,
|
||||
// SourceScript (FU3): identifier of the script that raised this
|
||||
// notification, threaded down from the script-execution context for the
|
||||
// central audit trail. Null when no single script owns the context.
|
||||
SourceScript: _sourceScript,
|
||||
SiteEnqueuedAt: DateTimeOffset.UtcNow);
|
||||
|
||||
var payloadJson = JsonSerializer.Serialize(payload);
|
||||
|
||||
// The S&F engine assigns its own GUID to the message; pin the message id to
|
||||
// the NotificationId so the buffer can be queried by it (Notify.Status) and
|
||||
// the forwarder's idempotency key matches the buffered row.
|
||||
await _storeAndForward.EnqueueAsync(
|
||||
StoreAndForwardCategory.Notification,
|
||||
target: _listName,
|
||||
payloadJson: payloadJson,
|
||||
originInstanceName: _instanceName,
|
||||
messageId: notificationId);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Notify enqueued notification {NotificationId} to list '{List}' for central delivery",
|
||||
notificationId, _listName);
|
||||
|
||||
return notificationId;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
using System.Text.Json;
|
||||
using Akka.Actor;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
|
||||
namespace ScadaLink.StoreAndForward;
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox: the site Store-and-Forward delivery handler for the
|
||||
/// <see cref="ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.Notification"/>
|
||||
/// category.
|
||||
///
|
||||
/// In the outbox design the site no longer sends notification email itself.
|
||||
/// "Delivering" a buffered notification means forwarding it to the central cluster
|
||||
/// and treating central's <see cref="NotificationSubmitAck"/> as the outcome:
|
||||
/// <list type="bullet">
|
||||
/// <item><description>ack <c>Accepted</c> → <see cref="DeliverAsync"/> returns
|
||||
/// <c>true</c>; the S&F engine removes the message from the buffer.</description></item>
|
||||
/// <item><description>ack not <c>Accepted</c>, or the Ask times out / fails →
|
||||
/// <see cref="DeliverAsync"/> throws; the S&F engine treats any thrown
|
||||
/// exception as transient and retries the forward at the fixed interval.</description></item>
|
||||
/// </list>
|
||||
///
|
||||
/// The forward travels over the ClusterClient command/control transport: the handler
|
||||
/// <see cref="ActorRefImplicitSenderExtensions.Ask{T}(ICanTell, object, TimeSpan?)">Asks</see>
|
||||
/// the site communication actor, which wraps the message in a
|
||||
/// <c>ClusterClient.Send("/user/central-communication", …)</c> and routes central's
|
||||
/// reply straight back to this Ask.
|
||||
/// </summary>
|
||||
public sealed class NotificationForwarder
|
||||
{
|
||||
private readonly IActorRef _siteCommunicationActor;
|
||||
private readonly string _sourceSiteId;
|
||||
private readonly TimeSpan _forwardTimeout;
|
||||
|
||||
/// <param name="siteCommunicationActor">
|
||||
/// The site communication actor. It forwards a <see cref="NotificationSubmit"/> to
|
||||
/// central via the registered ClusterClient and replies with the
|
||||
/// <see cref="NotificationSubmitAck"/>.
|
||||
/// </param>
|
||||
/// <param name="sourceSiteId">This site's identifier, stamped on every submit.</param>
|
||||
/// <param name="forwardTimeout">
|
||||
/// How long to wait for central's ack before treating the forward as a transient
|
||||
/// failure. Sourced from host configuration.
|
||||
/// </param>
|
||||
public NotificationForwarder(
|
||||
IActorRef siteCommunicationActor,
|
||||
string sourceSiteId,
|
||||
TimeSpan forwardTimeout)
|
||||
{
|
||||
_siteCommunicationActor = siteCommunicationActor;
|
||||
_sourceSiteId = sourceSiteId;
|
||||
_forwardTimeout = forwardTimeout;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Store-and-Forward delivery handler entry point — matches the
|
||||
/// <c>Func<StoreAndForwardMessage, Task<bool>></c> handler contract.
|
||||
/// Returns <c>true</c> when central accepts the notification; throws on a
|
||||
/// non-accepted ack or an Ask timeout/failure so the engine retries.
|
||||
/// </summary>
|
||||
public async Task<bool> DeliverAsync(StoreAndForwardMessage message)
|
||||
{
|
||||
// An unreadable payload cannot be fixed by retrying — park it (return false),
|
||||
// mirroring how the former SMTP handler treated a corrupt buffered payload.
|
||||
if (!TryBuildSubmit(message, out var submit))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// The reply may legitimately be a non-accepted ack, so it is not requested as
|
||||
// a status-failing Ask: ask for the bare NotificationSubmitAck and classify it
|
||||
// here. An Ask timeout surfaces as a TimeoutException, which — like any other
|
||||
// thrown exception — the S&F engine treats as transient.
|
||||
var ack = await _siteCommunicationActor
|
||||
.Ask<NotificationSubmitAck>(submit, _forwardTimeout)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (ack.Accepted)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// A non-accepted ack is a transient failure: central could not persist the
|
||||
// notification right now. Throw so the engine keeps buffering and retries.
|
||||
throw new NotificationForwardException(
|
||||
$"Central rejected notification {submit.NotificationId}: {ack.Error ?? "no detail"}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maps a buffered S&F notification message onto the <see cref="NotificationSubmit"/>
|
||||
/// forwarded to central, returning <c>false</c> if the payload is unreadable.
|
||||
///
|
||||
/// The buffered payload IS a serialized <see cref="NotificationSubmit"/> written by
|
||||
/// the site <c>Notify.Send</c> enqueue path (Task 19). Its
|
||||
/// <see cref="NotificationSubmit.NotificationId"/> is the central idempotency key —
|
||||
/// it was generated by the script, equals the buffered row's
|
||||
/// <see cref="StoreAndForwardMessage.Id"/>, and is stable across every retry. The
|
||||
/// forwarder forwards the payload as-is except that it re-stamps the fields it
|
||||
/// authoritatively owns: <see cref="NotificationSubmit.SourceSiteId"/> (this site's
|
||||
/// id) and <see cref="NotificationSubmit.SourceInstanceId"/> (the buffered row's
|
||||
/// origin instance), and it falls the list name back to the S&F
|
||||
/// <see cref="StoreAndForwardMessage.Target"/> when the payload list name is blank.
|
||||
/// </summary>
|
||||
private bool TryBuildSubmit(StoreAndForwardMessage message, out NotificationSubmit submit)
|
||||
{
|
||||
submit = null!;
|
||||
|
||||
NotificationSubmit? payload;
|
||||
try
|
||||
{
|
||||
payload = JsonSerializer.Deserialize<NotificationSubmit>(message.PayloadJson);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (payload == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
submit = payload with
|
||||
{
|
||||
// The NotificationId is the script-generated idempotency key carried in the
|
||||
// payload. Defend against a payload missing it by falling back to the
|
||||
// buffered row id, which the enqueue path pins to the same value.
|
||||
NotificationId = string.IsNullOrEmpty(payload.NotificationId)
|
||||
? message.Id
|
||||
: payload.NotificationId,
|
||||
// A null OR empty/blank ListName falls back to the S&F Target — so an empty
|
||||
// list name is never forwarded to central.
|
||||
ListName = string.IsNullOrEmpty(payload.ListName) ? message.Target : payload.ListName,
|
||||
// SourceSiteId/SourceInstanceId are authoritatively owned by the site: the
|
||||
// forwarder knows the real site id, and the buffered row records the origin
|
||||
// instance even after the instance is deleted.
|
||||
SourceSiteId = _sourceSiteId,
|
||||
SourceInstanceId = message.OriginInstanceName,
|
||||
};
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Raised by <see cref="NotificationForwarder"/> on a transient forward failure —
|
||||
/// a non-accepted central ack. The Store-and-Forward engine treats any thrown
|
||||
/// exception as transient and retries the forward at the fixed interval.
|
||||
/// </summary>
|
||||
public sealed class NotificationForwardException : Exception
|
||||
{
|
||||
public NotificationForwardException(string message) : base(message)
|
||||
{
|
||||
}
|
||||
}
|
||||
@@ -149,6 +149,13 @@ public class StoreAndForwardService
|
||||
/// When <c>false</c>, the caller has already made its own delivery attempt and the
|
||||
/// message is buffered directly for the retry sweep (the handler is not invoked here).
|
||||
/// </param>
|
||||
/// <param name="messageId">
|
||||
/// An explicit, caller-supplied message id. <c>null</c> (the default) makes the
|
||||
/// service mint a fresh GUID. The Notification Outbox enqueue path supplies its own
|
||||
/// id so the script-generated <c>NotificationId</c> is the single idempotency key —
|
||||
/// it is the buffered row's <see cref="StoreAndForwardMessage.Id"/>, it is carried
|
||||
/// inside the payload, and it is the id the forwarder submits to central.
|
||||
/// </param>
|
||||
public async Task<StoreAndForwardResult> EnqueueAsync(
|
||||
StoreAndForwardCategory category,
|
||||
string target,
|
||||
@@ -156,11 +163,12 @@ public class StoreAndForwardService
|
||||
string? originInstanceName = null,
|
||||
int? maxRetries = null,
|
||||
TimeSpan? retryInterval = null,
|
||||
bool attemptImmediateDelivery = true)
|
||||
bool attemptImmediateDelivery = true,
|
||||
string? messageId = null)
|
||||
{
|
||||
var message = new StoreAndForwardMessage
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
Id = messageId ?? Guid.NewGuid().ToString("N"),
|
||||
Category = category,
|
||||
Target = target,
|
||||
PayloadJson = payloadJson,
|
||||
@@ -430,6 +438,17 @@ public class StoreAndForwardService
|
||||
return await _storage.GetMessageCountByOriginInstanceAsync(instanceName);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox: looks up a buffered message by its id, or <c>null</c> if it
|
||||
/// is not (or no longer) in the buffer. <c>Notify.Status</c> uses this to detect a
|
||||
/// notification still in transit at the site — central reports it not-found while
|
||||
/// the S&F buffer still holds it, which is the site-local <c>Forwarding</c> state.
|
||||
/// </summary>
|
||||
public async Task<StoreAndForwardMessage?> GetMessageByIdAsync(string messageId)
|
||||
{
|
||||
return await _storage.GetMessageByIdAsync(messageId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-14: Raises the S&F activity notification. StoreAndForward-009: the
|
||||
/// delegate is snapshotted (so a concurrent unsubscribe cannot NRE) and every
|
||||
|
||||
@@ -27,13 +27,23 @@ public class NavigationTests
|
||||
[InlineData("Sites", "/admin/sites")]
|
||||
[InlineData("API Keys", "/admin/api-keys")]
|
||||
[InlineData("LDAP Mappings", "/admin/ldap-mappings")]
|
||||
[InlineData("SMTP Configuration", "/admin/smtp")]
|
||||
public async Task AdminNavLinks_NavigateCorrectly(string linkText, string expectedPath)
|
||||
{
|
||||
var page = await _fixture.NewAuthenticatedPageAsync();
|
||||
await ClickNavAndWait(page, linkText, expectedPath);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("SMTP Configuration", "/notifications/smtp")]
|
||||
[InlineData("Notification Lists", "/notifications/lists")]
|
||||
[InlineData("Notification Report", "/notifications/report")]
|
||||
[InlineData("Notification KPIs", "/notifications/kpis")]
|
||||
public async Task NotificationsNavLinks_NavigateCorrectly(string linkText, string expectedPath)
|
||||
{
|
||||
var page = await _fixture.NewAuthenticatedPageAsync();
|
||||
await ClickNavAndWait(page, linkText, expectedPath);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("Templates", "/design/templates")]
|
||||
[InlineData("Shared Scripts", "/design/shared-scripts")]
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
using System.Security.Claims;
|
||||
using Bunit;
|
||||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.AspNetCore.Components;
|
||||
using Microsoft.AspNetCore.Components.Authorization;
|
||||
using Microsoft.AspNetCore.Components.Rendering;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using ScadaLink.Security;
|
||||
using NavMenu = ScadaLink.CentralUI.Components.Layout.NavMenu;
|
||||
|
||||
namespace ScadaLink.CentralUI.Tests.Layout;
|
||||
|
||||
/// <summary>
|
||||
/// bUnit rendering tests for the sidebar <see cref="NavMenu"/>. They verify the
|
||||
/// new Notifications section: its items are gated per-policy, and the old
|
||||
/// <c>/admin/smtp</c> and <c>/monitoring/notification-outbox</c> routes are gone.
|
||||
/// The <c>AuthorizeView Policy=...</c> blocks evaluate the real policies, which
|
||||
/// require a claim of type <see cref="JwtTokenService.RoleClaimType"/> ("Role"),
|
||||
/// so the test principal carries claims of that exact type.
|
||||
/// </summary>
|
||||
public class NavMenuTests : BunitContext
|
||||
{
|
||||
/// <summary>
|
||||
/// Renders <see cref="NavMenu"/> under a principal holding the given roles.
|
||||
/// <see cref="NavMenu"/>'s top-level <c>AuthorizeView</c> requires the
|
||||
/// cascading <see cref="AuthenticationState"/>, so it is rendered inside a
|
||||
/// <see cref="CascadingAuthenticationState"/>; the real policies are
|
||||
/// registered so the per-item <c>AuthorizeView Policy=...</c> blocks are
|
||||
/// genuinely evaluated.
|
||||
/// </summary>
|
||||
private IRenderedComponent<NavMenu> RenderWithRoles(params string[] roles)
|
||||
{
|
||||
var claims = new List<Claim> { new("Username", "tester") };
|
||||
claims.AddRange(roles.Select(r => new Claim(JwtTokenService.RoleClaimType, r)));
|
||||
|
||||
var user = new ClaimsPrincipal(new ClaimsIdentity(claims, "TestAuth"));
|
||||
Services.AddSingleton<AuthenticationStateProvider>(new TestAuthStateProvider(user));
|
||||
Services.AddAuthorizationCore();
|
||||
AuthorizationPolicies.AddScadaLinkAuthorization(Services);
|
||||
// BunitContext pre-registers a placeholder IAuthorizationService that
|
||||
// throws when AuthorizeView evaluates a policy. Force the real service
|
||||
// so the per-item policy gating is genuinely exercised.
|
||||
Services.AddSingleton<IAuthorizationService, DefaultAuthorizationService>();
|
||||
|
||||
var host = Render<CascadingAuthenticationState>(parameters => parameters
|
||||
.Add(p => p.ChildContent, (RenderFragment)(builder =>
|
||||
{
|
||||
builder.OpenComponent<NavMenu>(0);
|
||||
builder.CloseComponent();
|
||||
})));
|
||||
|
||||
return host.FindComponent<NavMenu>();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationsSection_ShowsAllItems_ForMultiRoleUser()
|
||||
{
|
||||
var cut = RenderWithRoles("Admin", "Design", "Deployment");
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Contains("Notifications", cut.Markup);
|
||||
Assert.Contains("/notifications/smtp", cut.Markup);
|
||||
Assert.Contains("/notifications/lists", cut.Markup);
|
||||
Assert.Contains("/notifications/report", cut.Markup);
|
||||
Assert.Contains("/notifications/kpis", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationsSection_AdminOnlyUser_SeesOnlySmtp()
|
||||
{
|
||||
var cut = RenderWithRoles("Admin");
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Contains("/notifications/smtp", cut.Markup);
|
||||
Assert.DoesNotContain("/notifications/report", cut.Markup);
|
||||
Assert.DoesNotContain("/notifications/lists", cut.Markup);
|
||||
Assert.DoesNotContain("/notifications/kpis", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OldRoutes_AreNoLongerLinked()
|
||||
{
|
||||
var cut = RenderWithRoles("Admin", "Design", "Deployment");
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.DoesNotContain("/admin/smtp", cut.Markup);
|
||||
Assert.DoesNotContain("/monitoring/notification-outbox", cut.Markup);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
using System.Security.Claims;
|
||||
using Akka.Actor;
|
||||
using Bunit;
|
||||
using Microsoft.AspNetCore.Components.Authorization;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Entities.Sites;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Communication;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using HealthPage = ScadaLink.CentralUI.Components.Pages.Monitoring.Health;
|
||||
|
||||
namespace ScadaLink.CentralUI.Tests.Pages;
|
||||
|
||||
/// <summary>
|
||||
/// bUnit rendering tests for the Health Monitoring dashboard (Task 24).
|
||||
///
|
||||
/// Scope: the Notification Outbox KPI tile row added to the Health dashboard.
|
||||
/// <see cref="ICentralHealthAggregator"/> is an interface (mockable), but
|
||||
/// <see cref="CommunicationService"/> is a concrete class whose outbox calls
|
||||
/// route through an injected notification-outbox <see cref="IActorRef"/>; the
|
||||
/// tests reuse the scripted-actor seam established by the Notification Report
|
||||
/// page tests (see <c>NotificationReportPageTests</c>).
|
||||
/// </summary>
|
||||
public class HealthPageTests : BunitContext
|
||||
{
|
||||
private readonly ActorSystem _system = ActorSystem.Create("health-page-tests");
|
||||
private readonly CommunicationService _comms;
|
||||
|
||||
// Mutable scripted reply — individual tests can override before rendering.
|
||||
private NotificationKpiResponse _kpiReply =
|
||||
new("k", true, null, QueueDepth: 12, StuckCount: 4, ParkedCount: 3,
|
||||
DeliveredLastInterval: 88, OldestPendingAge: TimeSpan.FromMinutes(6));
|
||||
|
||||
public HealthPageTests()
|
||||
{
|
||||
_comms = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
|
||||
var outbox = _system.ActorOf(Props.Create(() => new ScriptedOutboxActor(this)));
|
||||
_comms.SetNotificationOutbox(outbox);
|
||||
Services.AddSingleton(_comms);
|
||||
|
||||
var aggregator = Substitute.For<ICentralHealthAggregator>();
|
||||
aggregator.GetAllSiteStates()
|
||||
.Returns(new Dictionary<string, SiteHealthState>());
|
||||
Services.AddSingleton(aggregator);
|
||||
|
||||
var siteRepo = Substitute.For<ISiteRepository>();
|
||||
siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IReadOnlyList<Site>>(new List<Site>()));
|
||||
Services.AddSingleton(siteRepo);
|
||||
|
||||
var claims = new[]
|
||||
{
|
||||
new Claim("Username", "tester"),
|
||||
new Claim(ClaimTypes.Role, "Admin"),
|
||||
};
|
||||
var user = new ClaimsPrincipal(new ClaimsIdentity(claims, "TestAuth"));
|
||||
Services.AddSingleton<AuthenticationStateProvider>(new TestAuthStateProvider(user));
|
||||
Services.AddAuthorizationCore();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Renders_OutboxKpiTiles_WithValues()
|
||||
{
|
||||
var cut = Render<HealthPage>();
|
||||
|
||||
// KPI data arrives via an async actor Ask after first render.
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Contains("Notification Outbox", cut.Markup);
|
||||
Assert.Contains("Queue Depth", cut.Markup);
|
||||
Assert.Contains("Stuck", cut.Markup);
|
||||
Assert.Contains("Parked", cut.Markup);
|
||||
// KPI numeric values surface in the tiles.
|
||||
Assert.Contains(">12<", cut.Markup); // QueueDepth
|
||||
Assert.Contains(">4<", cut.Markup); // StuckCount
|
||||
Assert.Contains(">3<", cut.Markup); // ParkedCount
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RendersLinkToTheNotificationKpisPage()
|
||||
{
|
||||
var cut = Render<HealthPage>();
|
||||
var link = cut.Find("a[href='/notifications/kpis']");
|
||||
Assert.Contains("View details", link.TextContent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OutboxKpiFailure_ShowsGracefulFallback()
|
||||
{
|
||||
_kpiReply = new NotificationKpiResponse(
|
||||
"k", false, "outbox repository unavailable", 0, 0, 0, 0, null);
|
||||
|
||||
var cut = Render<HealthPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
// Failure must not crash the page; tiles fall back to a dash.
|
||||
Assert.Contains("Notification Outbox", cut.Markup);
|
||||
Assert.Contains("Queue Depth", cut.Markup);
|
||||
Assert.Contains(">—<", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
_system.Terminate().Wait(TimeSpan.FromSeconds(5));
|
||||
}
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stand-in for the notification-outbox actor. Replies to the KPI request
|
||||
/// with the test's currently-scripted response.
|
||||
/// </summary>
|
||||
private sealed class ScriptedOutboxActor : ReceiveActor
|
||||
{
|
||||
public ScriptedOutboxActor(HealthPageTests test)
|
||||
{
|
||||
Receive<NotificationKpiRequest>(_ => Sender.Tell(test._kpiReply));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,166 @@
|
||||
using System.Security.Claims;
|
||||
using Akka.Actor;
|
||||
using Bunit;
|
||||
using Microsoft.AspNetCore.Components.Authorization;
|
||||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Entities.Sites;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
using ScadaLink.Communication;
|
||||
using ScadaLink.Security;
|
||||
using NotificationKpisPage = ScadaLink.CentralUI.Components.Pages.Notifications.NotificationKpis;
|
||||
|
||||
namespace ScadaLink.CentralUI.Tests.Pages;
|
||||
|
||||
/// <summary>
|
||||
/// bUnit rendering tests for the Notification KPIs page.
|
||||
///
|
||||
/// Testability note: <see cref="CommunicationService"/> is a concrete class with
|
||||
/// non-virtual methods, so NSubstitute cannot intercept it. Both the global and
|
||||
/// per-site KPI calls route through an injected <see cref="IActorRef"/> (the
|
||||
/// notification-outbox proxy), so the tests wire a real, lightweight
|
||||
/// <see cref="ActorSystem"/> with a scripted <see cref="ReceiveActor"/> that
|
||||
/// answers both <see cref="NotificationKpiRequest"/> and
|
||||
/// <see cref="PerSiteNotificationKpiRequest"/> — the same seam
|
||||
/// <c>SetNotificationOutbox</c> exists for.
|
||||
/// </summary>
|
||||
public class NotificationKpisPageTests : BunitContext
|
||||
{
|
||||
private readonly ActorSystem _system = ActorSystem.Create("notif-kpis-tests");
|
||||
private readonly CommunicationService _comms;
|
||||
|
||||
// Mutable scripted replies — individual tests can override before rendering.
|
||||
private NotificationKpiResponse _kpiReply =
|
||||
new("k", true, null, QueueDepth: 7, StuckCount: 2, ParkedCount: 1,
|
||||
DeliveredLastInterval: 42, OldestPendingAge: TimeSpan.FromMinutes(9));
|
||||
|
||||
private PerSiteNotificationKpiResponse _perSiteReply =
|
||||
new("p", true, null, new List<SiteNotificationKpiSnapshot>
|
||||
{
|
||||
new("plant-a", QueueDepth: 4, StuckCount: 1, ParkedCount: 0,
|
||||
DeliveredLastInterval: 9, OldestPendingAge: TimeSpan.FromMinutes(7)),
|
||||
});
|
||||
|
||||
public NotificationKpisPageTests()
|
||||
{
|
||||
_comms = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
|
||||
var outbox = _system.ActorOf(Props.Create(() => new ScriptedOutboxActor(this)));
|
||||
_comms.SetNotificationOutbox(outbox);
|
||||
|
||||
Services.AddSingleton(_comms);
|
||||
|
||||
var siteRepo = Substitute.For<ISiteRepository>();
|
||||
siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IReadOnlyList<Site>>(new List<Site>
|
||||
{
|
||||
new("Plant A", "plant-a") { Id = 1 },
|
||||
new("Plant B", "plant-b") { Id = 2 },
|
||||
}));
|
||||
Services.AddSingleton(siteRepo);
|
||||
|
||||
var claims = new[]
|
||||
{
|
||||
new Claim("Username", "tester"),
|
||||
new Claim(ClaimTypes.Role, "Deployment"),
|
||||
};
|
||||
var user = new ClaimsPrincipal(new ClaimsIdentity(claims, "TestAuth"));
|
||||
Services.AddSingleton<AuthenticationStateProvider>(new TestAuthStateProvider(user));
|
||||
Services.AddAuthorizationCore();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Page_RequiresDeploymentPolicy()
|
||||
{
|
||||
var attr = typeof(NotificationKpisPage)
|
||||
.GetCustomAttributes(typeof(AuthorizeAttribute), true)
|
||||
.Cast<AuthorizeAttribute>()
|
||||
.FirstOrDefault();
|
||||
|
||||
Assert.NotNull(attr);
|
||||
Assert.Equal(AuthorizationPolicies.RequireDeployment, attr!.Policy);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RendersGlobalTilesAndPerSiteRows()
|
||||
{
|
||||
var cut = Render<NotificationKpisPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Contains("Queue Depth", cut.Markup);
|
||||
Assert.Contains("7", cut.Markup); // global tile value
|
||||
// Per-site row — site identifier "plant-a" resolves to its friendly name.
|
||||
Assert.Contains("Plant A", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ShowsKpiError_WhenGlobalKpiQueryFails()
|
||||
{
|
||||
_kpiReply = new NotificationKpiResponse(
|
||||
"k", false, "kpi down", 0, 0, 0, 0, null);
|
||||
|
||||
var cut = Render<NotificationKpisPage>();
|
||||
|
||||
cut.WaitForAssertion(() => Assert.Contains("kpi down", cut.Markup));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ShowsPerSiteError_WhenPerSiteKpiQueryFails()
|
||||
{
|
||||
// Only the per-site path errors — the global KPI reply stays successful.
|
||||
_perSiteReply = new PerSiteNotificationKpiResponse(
|
||||
"p", false, "per-site down", new List<SiteNotificationKpiSnapshot>());
|
||||
|
||||
var cut = Render<NotificationKpisPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Contains("Per-site KPIs unavailable: per-site down", cut.Markup);
|
||||
// The two error paths are isolated — the global KPI alert (whose markup
|
||||
// opens ">KPIs unavailable:", without the "Per-site " prefix) must not appear.
|
||||
Assert.DoesNotContain(">KPIs unavailable:", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ShowsPerSiteEmptyState_WhenNoSites()
|
||||
{
|
||||
_perSiteReply = new PerSiteNotificationKpiResponse(
|
||||
"p", true, null, new List<SiteNotificationKpiSnapshot>());
|
||||
|
||||
var cut = Render<NotificationKpisPage>();
|
||||
|
||||
cut.WaitForAssertion(() => Assert.Contains("No per-site activity", cut.Markup));
|
||||
}
|
||||
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
_system.Terminate().Wait(TimeSpan.FromSeconds(5));
|
||||
}
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stand-in for the notification-outbox actor. Replies to each KPI message
|
||||
/// type with the test's currently-scripted response.
|
||||
/// </summary>
|
||||
private sealed class ScriptedOutboxActor : ReceiveActor
|
||||
{
|
||||
public ScriptedOutboxActor(NotificationKpisPageTests test)
|
||||
{
|
||||
Receive<NotificationKpiRequest>(_ => Sender.Tell(test._kpiReply));
|
||||
Receive<PerSiteNotificationKpiRequest>(_ => Sender.Tell(test._perSiteReply));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
using System.Security.Claims;
|
||||
using Bunit;
|
||||
using Microsoft.AspNetCore.Components.Authorization;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using NSubstitute;
|
||||
using ScadaLink.CentralUI.Components.Shared;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using NotificationListsPage = ScadaLink.CentralUI.Components.Pages.Notifications.NotificationLists;
|
||||
|
||||
namespace ScadaLink.CentralUI.Tests.Pages;
|
||||
|
||||
/// <summary>
|
||||
/// bUnit rendering tests for the standalone Notification Lists page (Task 7).
|
||||
/// </summary>
|
||||
public class NotificationListsPageTests : BunitContext
|
||||
{
|
||||
private void WireAuthAndDialog()
|
||||
{
|
||||
Services.AddSingleton<IDialogService>(new AlwaysConfirmDialogService());
|
||||
|
||||
var claims = new[]
|
||||
{
|
||||
new Claim("Username", "tester"),
|
||||
new Claim(ClaimTypes.Role, "Design"),
|
||||
};
|
||||
var user = new ClaimsPrincipal(new ClaimsIdentity(claims, "TestAuth"));
|
||||
Services.AddSingleton<AuthenticationStateProvider>(new TestAuthStateProvider(user));
|
||||
Services.AddAuthorizationCore();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RendersNotificationListRows()
|
||||
{
|
||||
var repo = Substitute.For<INotificationRepository>();
|
||||
repo.GetAllNotificationListsAsync()
|
||||
.Returns(Task.FromResult<IReadOnlyList<NotificationList>>(
|
||||
new List<NotificationList> { new("Ops On-Call") { Id = 1 } }));
|
||||
repo.GetRecipientsByListIdAsync(1)
|
||||
.Returns(Task.FromResult<IReadOnlyList<NotificationRecipient>>(
|
||||
new List<NotificationRecipient> { new("Jane", "jane@example.com") }));
|
||||
Services.AddSingleton(repo);
|
||||
WireAuthAndDialog();
|
||||
|
||||
var cut = Render<NotificationListsPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Contains("Ops On-Call", cut.Markup);
|
||||
Assert.Contains("jane@example.com", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ShowsEmptyState_WhenNoLists()
|
||||
{
|
||||
var repo = Substitute.For<INotificationRepository>();
|
||||
repo.GetAllNotificationListsAsync()
|
||||
.Returns(Task.FromResult<IReadOnlyList<NotificationList>>(
|
||||
new List<NotificationList>()));
|
||||
Services.AddSingleton(repo);
|
||||
WireAuthAndDialog();
|
||||
|
||||
var cut = Render<NotificationListsPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
Assert.Contains("No notification lists", cut.Markup));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DeleteList_ConfirmsThenDeletesAndReloads()
|
||||
{
|
||||
var repo = Substitute.For<INotificationRepository>();
|
||||
repo.GetAllNotificationListsAsync()
|
||||
.Returns(Task.FromResult<IReadOnlyList<NotificationList>>(
|
||||
new List<NotificationList> { new("Ops On-Call") { Id = 1 } }));
|
||||
repo.GetRecipientsByListIdAsync(1)
|
||||
.Returns(Task.FromResult<IReadOnlyList<NotificationRecipient>>(
|
||||
new List<NotificationRecipient>()));
|
||||
Services.AddSingleton(repo);
|
||||
WireAuthAndDialog();
|
||||
|
||||
var cut = Render<NotificationListsPage>();
|
||||
|
||||
cut.WaitForState(() => cut.Markup.Contains("Ops On-Call"));
|
||||
|
||||
var deleteButton = cut.FindAll("tbody tr button")
|
||||
.First(b => b.TextContent.Contains("Delete"));
|
||||
deleteButton.Click();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
repo.Received().DeleteNotificationListAsync(1);
|
||||
repo.Received().SaveChangesAsync();
|
||||
// Reload re-invokes the list query (once on init, once after delete).
|
||||
repo.Received(2).GetAllNotificationListsAsync();
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>A dialog service that auto-confirms, so action paths run end-to-end.</summary>
|
||||
private sealed class AlwaysConfirmDialogService : IDialogService
|
||||
{
|
||||
public Task<bool> ConfirmAsync(string title, string message, bool danger = false)
|
||||
=> Task.FromResult(true);
|
||||
|
||||
public Task<string?> PromptAsync(
|
||||
string title, string label, string initialValue = "", string? placeholder = null)
|
||||
=> Task.FromResult<string?>(null);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
using System.Security.Claims;
|
||||
using Akka.Actor;
|
||||
using Bunit;
|
||||
using Microsoft.AspNetCore.Components.Authorization;
|
||||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.CentralUI.Components.Shared;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Entities.Sites;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Communication;
|
||||
using ScadaLink.Security;
|
||||
using NotificationReportPage = ScadaLink.CentralUI.Components.Pages.Notifications.NotificationReport;
|
||||
|
||||
namespace ScadaLink.CentralUI.Tests.Pages;
|
||||
|
||||
/// <summary>
|
||||
/// bUnit rendering tests for the Notification Report page.
|
||||
///
|
||||
/// Testability note: <see cref="CommunicationService"/> is a concrete class with
|
||||
/// non-virtual methods, so NSubstitute cannot intercept it. The report calls all
|
||||
/// route through an injected <see cref="IActorRef"/> (the notification-outbox
|
||||
/// proxy), so the tests wire a real, lightweight <see cref="ActorSystem"/> with a
|
||||
/// scripted <see cref="ReceiveActor"/> that replies with fixed responses — the
|
||||
/// same seam <c>SetNotificationOutbox</c> exists for.
|
||||
/// </summary>
|
||||
public class NotificationReportPageTests : BunitContext
|
||||
{
|
||||
private readonly ActorSystem _system = ActorSystem.Create("notif-report-tests");
|
||||
private readonly CommunicationService _comms;
|
||||
|
||||
// Mutable scripted reply — individual tests can override before rendering.
|
||||
private NotificationOutboxQueryResponse _queryReply =
|
||||
new("q", true, null, new List<NotificationSummary>
|
||||
{
|
||||
new("notif-aaaaaaaa-1111", "Email", "Ops On-Call", "Pump fault at Plant-A",
|
||||
"Parked", RetryCount: 3, LastError: "SMTP timeout", SourceSiteId: "plant-a",
|
||||
SourceInstanceId: "Pump-001", CreatedAt: DateTimeOffset.UtcNow.AddMinutes(-30),
|
||||
DeliveredAt: null, IsStuck: true),
|
||||
new("notif-bbbbbbbb-2222", "Email", "Maintenance", "Daily summary",
|
||||
"Delivered", RetryCount: 0, LastError: null, SourceSiteId: "plant-b",
|
||||
SourceInstanceId: null, CreatedAt: DateTimeOffset.UtcNow.AddHours(-2),
|
||||
DeliveredAt: DateTimeOffset.UtcNow.AddHours(-2), IsStuck: false),
|
||||
}, TotalCount: 2);
|
||||
|
||||
// Records the most recent retry/discard requests the actor received.
|
||||
private readonly List<RetryNotificationRequest> _retryRequests = new();
|
||||
private readonly List<DiscardNotificationRequest> _discardRequests = new();
|
||||
|
||||
public NotificationReportPageTests()
|
||||
{
|
||||
_comms = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
|
||||
var outbox = _system.ActorOf(Props.Create(() => new ScriptedOutboxActor(this)));
|
||||
_comms.SetNotificationOutbox(outbox);
|
||||
|
||||
Services.AddSingleton(_comms);
|
||||
Services.AddSingleton<IDialogService>(new AlwaysConfirmDialogService());
|
||||
|
||||
var siteRepo = Substitute.For<ISiteRepository>();
|
||||
siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IReadOnlyList<Site>>(new List<Site>
|
||||
{
|
||||
new("Plant A", "plant-a") { Id = 1 },
|
||||
new("Plant B", "plant-b") { Id = 2 },
|
||||
}));
|
||||
Services.AddSingleton(siteRepo);
|
||||
|
||||
var claims = new[]
|
||||
{
|
||||
new Claim("Username", "tester"),
|
||||
new Claim(ClaimTypes.Role, "Deployment"),
|
||||
};
|
||||
var user = new ClaimsPrincipal(new ClaimsIdentity(claims, "TestAuth"));
|
||||
Services.AddSingleton<AuthenticationStateProvider>(new TestAuthStateProvider(user));
|
||||
Services.AddAuthorizationCore();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Page_RequiresDeploymentPolicy()
|
||||
{
|
||||
var attr = typeof(NotificationReportPage)
|
||||
.GetCustomAttributes(typeof(AuthorizeAttribute), true)
|
||||
.Cast<AuthorizeAttribute>()
|
||||
.FirstOrDefault();
|
||||
|
||||
Assert.NotNull(attr);
|
||||
Assert.Equal(AuthorizationPolicies.RequireDeployment, attr!.Policy);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Renders_NotificationRows()
|
||||
{
|
||||
var cut = Render<NotificationReportPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Contains("Pump fault at Plant-A", cut.Markup);
|
||||
Assert.Contains("Daily summary", cut.Markup);
|
||||
Assert.Contains("Ops On-Call", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StuckRow_IsBadged()
|
||||
{
|
||||
var cut = Render<NotificationReportPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
var stuckRow = cut.FindAll("tbody tr")
|
||||
.First(r => r.TextContent.Contains("Pump fault at Plant-A"));
|
||||
|
||||
// The stuck row carries a visible "Stuck" badge.
|
||||
Assert.Contains("badge", stuckRow.InnerHtml);
|
||||
Assert.Contains("Stuck", stuckRow.TextContent);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ClickRetry_OnParkedRow_CallsRetryNotification()
|
||||
{
|
||||
var cut = Render<NotificationReportPage>();
|
||||
|
||||
cut.WaitForState(() => cut.Markup.Contains("Pump fault at Plant-A"));
|
||||
|
||||
var parkedRow = cut.FindAll("tbody tr")
|
||||
.First(r => r.TextContent.Contains("Pump fault at Plant-A"));
|
||||
var retryButton = parkedRow.QuerySelectorAll("button")
|
||||
.First(b => b.TextContent.Contains("Retry"));
|
||||
|
||||
retryButton.Click();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Single(_retryRequests);
|
||||
Assert.Equal("notif-aaaaaaaa-1111", _retryRequests[0].NotificationId);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ClickDiscard_OnParkedRow_CallsDiscardNotification()
|
||||
{
|
||||
var cut = Render<NotificationReportPage>();
|
||||
|
||||
cut.WaitForState(() => cut.Markup.Contains("Pump fault at Plant-A"));
|
||||
|
||||
var parkedRow = cut.FindAll("tbody tr")
|
||||
.First(r => r.TextContent.Contains("Pump fault at Plant-A"));
|
||||
var discardButton = parkedRow.QuerySelectorAll("button")
|
||||
.First(b => b.TextContent.Contains("Discard"));
|
||||
|
||||
discardButton.Click();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
Assert.Single(_discardRequests);
|
||||
Assert.Equal("notif-aaaaaaaa-1111", _discardRequests[0].NotificationId);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void QueryFailure_ShowsErrorMessage()
|
||||
{
|
||||
_queryReply = new NotificationOutboxQueryResponse(
|
||||
"q", false, "outbox query backend unavailable",
|
||||
new List<NotificationSummary>(), TotalCount: 0);
|
||||
|
||||
var cut = Render<NotificationReportPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
Assert.Contains("outbox query backend unavailable", cut.Markup));
|
||||
}
|
||||
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
_system.Terminate().Wait(TimeSpan.FromSeconds(5));
|
||||
}
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stand-in for the notification-outbox actor. Replies to each outbox message
|
||||
/// type with the test's currently-scripted response.
|
||||
/// </summary>
|
||||
private sealed class ScriptedOutboxActor : ReceiveActor
|
||||
{
|
||||
public ScriptedOutboxActor(NotificationReportPageTests test)
|
||||
{
|
||||
Receive<NotificationOutboxQueryRequest>(_ => Sender.Tell(test._queryReply));
|
||||
Receive<RetryNotificationRequest>(r =>
|
||||
{
|
||||
test._retryRequests.Add(r);
|
||||
Sender.Tell(new RetryNotificationResponse(r.CorrelationId, true, null));
|
||||
});
|
||||
Receive<DiscardNotificationRequest>(r =>
|
||||
{
|
||||
test._discardRequests.Add(r);
|
||||
Sender.Tell(new DiscardNotificationResponse(r.CorrelationId, true, null));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>A dialog service that auto-confirms, so action paths run end-to-end.</summary>
|
||||
private sealed class AlwaysConfirmDialogService : IDialogService
|
||||
{
|
||||
public Task<bool> ConfirmAsync(string title, string message, bool danger = false)
|
||||
=> Task.FromResult(true);
|
||||
|
||||
public Task<string?> PromptAsync(
|
||||
string title, string label, string initialValue = "", string? placeholder = null)
|
||||
=> Task.FromResult<string?>(null);
|
||||
}
|
||||
}
|
||||
@@ -466,6 +466,42 @@ public class ScriptAnalysisServiceTests
|
||||
Assert.Equal("42", result.ReturnValueJson);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotifyOutboxShape_DiagnosesClean()
|
||||
{
|
||||
// Notification Outbox: the sandbox Notify surface must be
|
||||
// signature-faithful to production NotifyHelper/NotifyTarget —
|
||||
// Send returns Task<string> (a NotificationId) and Status takes that
|
||||
// id. A script using the new shape must compile clean in the sandbox,
|
||||
// exactly as it would against the real site runtime.
|
||||
var resp = _svc.Diagnose(new DiagnoseRequest(
|
||||
"var id = await Notify.To(\"ops\").Send(\"subj\", \"body\"); " +
|
||||
"var st = await Notify.Status(id); " +
|
||||
"return st.Status;"));
|
||||
|
||||
Assert.DoesNotContain(resp.Markers, m => m.Code.StartsWith("CS"));
|
||||
Assert.DoesNotContain(resp.Markers, m => m.Code.StartsWith("SCADA"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RunInSandbox_NotifyOutboxShape_StillRuns()
|
||||
{
|
||||
// The new Notify shape must also run end-to-end in the no-op sandbox:
|
||||
// Send yields a fake NotificationId, Status yields a placeholder
|
||||
// NotificationDeliveryStatus.
|
||||
var result = await _svc.RunInSandboxAsync(
|
||||
new SandboxRunRequest(
|
||||
"var id = await Notify.To(\"ops\").Send(\"subj\", \"body\"); " +
|
||||
"var st = await Notify.Status(id); " +
|
||||
"return st.Status;",
|
||||
Parameters: null,
|
||||
TimeoutSeconds: null),
|
||||
CancellationToken.None);
|
||||
|
||||
Assert.True(result.Success);
|
||||
Assert.Equal("\"Unknown\"", result.ReturnValueJson);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RunInSandbox_CapturesConsoleOutput()
|
||||
{
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
|
||||
namespace ScadaLink.Commons.Tests.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the <see cref="Notification"/> outbox entity's constructor defaults
|
||||
/// and null-argument guards on required reference-type parameters.
|
||||
/// </summary>
|
||||
public class NotificationEntityTests
|
||||
{
|
||||
[Fact]
|
||||
public void Constructor_SetsDefaults()
|
||||
{
|
||||
var n = new Notification("id-1", NotificationType.Email, "ops-team", "subj", "body", "SiteA");
|
||||
Assert.Equal(NotificationStatus.Pending, n.Status);
|
||||
Assert.Equal(0, n.RetryCount);
|
||||
Assert.Equal("id-1", n.NotificationId);
|
||||
Assert.Equal(NotificationType.Email, n.Type);
|
||||
Assert.Equal("ops-team", n.ListName);
|
||||
Assert.Equal("SiteA", n.SourceSiteId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Constructor_NullArguments_Throw()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(() => new Notification(null!, NotificationType.Email, "list", "s", "b", "SiteA"));
|
||||
Assert.Throws<ArgumentNullException>(() => new Notification("id", NotificationType.Email, null!, "s", "b", "SiteA"));
|
||||
Assert.Throws<ArgumentNullException>(() => new Notification("id", NotificationType.Email, "list", null!, "b", "SiteA"));
|
||||
Assert.Throws<ArgumentNullException>(() => new Notification("id", NotificationType.Email, "list", "s", null!, "SiteA"));
|
||||
Assert.Throws<ArgumentNullException>(() => new Notification("id", NotificationType.Email, "list", "s", "b", null!));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
namespace ScadaLink.Commons.Tests.Messages;
|
||||
|
||||
/// <summary>
|
||||
/// Notification Outbox: construction and value-equality tests for the
|
||||
/// site/central notification message contracts and the outbox UI query/action contracts.
|
||||
/// </summary>
|
||||
public class NotificationMessagesTests
|
||||
{
|
||||
// ── Task 7: site/central notification message contracts ──
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmit_PositionalConstruction_SetsAllFields()
|
||||
{
|
||||
var enqueuedAt = DateTimeOffset.UtcNow;
|
||||
var msg = new NotificationSubmit(
|
||||
"notif-1", "Operators", "Tank overflow", "Tank 3 has overflowed.",
|
||||
"site-01", "inst-7", "OnAlarm", enqueuedAt);
|
||||
|
||||
Assert.Equal("notif-1", msg.NotificationId);
|
||||
Assert.Equal("Operators", msg.ListName);
|
||||
Assert.Equal("Tank overflow", msg.Subject);
|
||||
Assert.Equal("Tank 3 has overflowed.", msg.Body);
|
||||
Assert.Equal("site-01", msg.SourceSiteId);
|
||||
Assert.Equal("inst-7", msg.SourceInstanceId);
|
||||
Assert.Equal("OnAlarm", msg.SourceScript);
|
||||
Assert.Equal(enqueuedAt, msg.SiteEnqueuedAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmit_AllowsNullOptionalSourceFields()
|
||||
{
|
||||
var msg = new NotificationSubmit(
|
||||
"notif-2", "Operators", "Subject", "Body",
|
||||
"site-01", null, null, DateTimeOffset.UtcNow);
|
||||
|
||||
Assert.Null(msg.SourceInstanceId);
|
||||
Assert.Null(msg.SourceScript);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmit_ValueEquality_EqualWhenAllFieldsMatch()
|
||||
{
|
||||
var enqueuedAt = DateTimeOffset.UtcNow;
|
||||
var a = new NotificationSubmit("n", "L", "S", "B", "site", "inst", "scr", enqueuedAt);
|
||||
var b = new NotificationSubmit("n", "L", "S", "B", "site", "inst", "scr", enqueuedAt);
|
||||
|
||||
Assert.Equal(a, b);
|
||||
Assert.Equal(a.GetHashCode(), b.GetHashCode());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmitAck_WithExpression_ChangesSingleField()
|
||||
{
|
||||
var ack = new NotificationSubmitAck("notif-1", true, null);
|
||||
var rejected = ack with { Accepted = false, Error = "duplicate" };
|
||||
|
||||
Assert.True(ack.Accepted);
|
||||
Assert.False(rejected.Accepted);
|
||||
Assert.Equal("duplicate", rejected.Error);
|
||||
Assert.Equal("notif-1", rejected.NotificationId);
|
||||
Assert.NotEqual(ack, rejected);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationStatusQuery_PositionalConstruction_SetsAllFields()
|
||||
{
|
||||
var msg = new NotificationStatusQuery("corr-1", "notif-9");
|
||||
|
||||
Assert.Equal("corr-1", msg.CorrelationId);
|
||||
Assert.Equal("notif-9", msg.NotificationId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationStatusResponse_PositionalConstruction_SetsAllFields()
|
||||
{
|
||||
var deliveredAt = DateTimeOffset.UtcNow;
|
||||
var msg = new NotificationStatusResponse(
|
||||
"corr-1", true, "Delivered", 2, null, deliveredAt);
|
||||
|
||||
Assert.Equal("corr-1", msg.CorrelationId);
|
||||
Assert.True(msg.Found);
|
||||
Assert.Equal("Delivered", msg.Status);
|
||||
Assert.Equal(2, msg.RetryCount);
|
||||
Assert.Null(msg.LastError);
|
||||
Assert.Equal(deliveredAt, msg.DeliveredAt);
|
||||
}
|
||||
|
||||
// ── Task 8: outbox UI query/action contracts ──
|
||||
|
||||
[Fact]
|
||||
public void NotificationOutboxQueryRequest_PositionalConstruction_SetsAllFields()
|
||||
{
|
||||
var from = DateTimeOffset.UtcNow.AddDays(-1);
|
||||
var to = DateTimeOffset.UtcNow;
|
||||
var msg = new NotificationOutboxQueryRequest(
|
||||
"corr-1", "Stuck", "Email", "site-01", "Operators", true, "overflow",
|
||||
from, to, 2, 50);
|
||||
|
||||
Assert.Equal("corr-1", msg.CorrelationId);
|
||||
Assert.Equal("Stuck", msg.StatusFilter);
|
||||
Assert.Equal("Email", msg.TypeFilter);
|
||||
Assert.Equal("site-01", msg.SourceSiteFilter);
|
||||
Assert.Equal("Operators", msg.ListNameFilter);
|
||||
Assert.True(msg.StuckOnly);
|
||||
Assert.Equal("overflow", msg.SubjectKeyword);
|
||||
Assert.Equal(from, msg.From);
|
||||
Assert.Equal(to, msg.To);
|
||||
Assert.Equal(2, msg.PageNumber);
|
||||
Assert.Equal(50, msg.PageSize);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationSummary_ValueEquality_EqualWhenAllFieldsMatch()
|
||||
{
|
||||
var createdAt = DateTimeOffset.UtcNow;
|
||||
var a = new NotificationSummary(
|
||||
"n", "Email", "Ops", "S", "Pending", 1, null, "site-01", "inst-1",
|
||||
createdAt, null, false);
|
||||
var b = new NotificationSummary(
|
||||
"n", "Email", "Ops", "S", "Pending", 1, null, "site-01", "inst-1",
|
||||
createdAt, null, false);
|
||||
|
||||
Assert.Equal(a, b);
|
||||
Assert.Equal(a.GetHashCode(), b.GetHashCode());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationOutboxQueryResponse_PositionalConstruction_SetsAllFields()
|
||||
{
|
||||
var summary = new NotificationSummary(
|
||||
"n", "Email", "Ops", "S", "Delivered", 0, null, "site-01", null,
|
||||
DateTimeOffset.UtcNow, DateTimeOffset.UtcNow, false);
|
||||
var msg = new NotificationOutboxQueryResponse(
|
||||
"corr-1", true, null, new[] { summary }, 1);
|
||||
|
||||
Assert.Equal("corr-1", msg.CorrelationId);
|
||||
Assert.True(msg.Success);
|
||||
Assert.Null(msg.ErrorMessage);
|
||||
Assert.Single(msg.Notifications);
|
||||
Assert.Equal(1, msg.TotalCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RetryNotificationRequestAndResponse_RoundTripFields()
|
||||
{
|
||||
var request = new RetryNotificationRequest("corr-1", "notif-1");
|
||||
var response = new RetryNotificationResponse("corr-1", true, null);
|
||||
|
||||
Assert.Equal("corr-1", request.CorrelationId);
|
||||
Assert.Equal("notif-1", request.NotificationId);
|
||||
Assert.True(response.Success);
|
||||
Assert.Null(response.ErrorMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DiscardNotificationRequestAndResponse_RoundTripFields()
|
||||
{
|
||||
var request = new DiscardNotificationRequest("corr-1", "notif-1");
|
||||
var response = new DiscardNotificationResponse("corr-1", false, "not found");
|
||||
|
||||
Assert.Equal("notif-1", request.NotificationId);
|
||||
Assert.False(response.Success);
|
||||
Assert.Equal("not found", response.ErrorMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationKpiResponse_WithExpression_ChangesSingleField()
|
||||
{
|
||||
var kpi = new NotificationKpiResponse(
|
||||
"corr-1", Success: true, ErrorMessage: null, 10, 2, 1, 5, TimeSpan.FromMinutes(3));
|
||||
var updated = kpi with { QueueDepth = 12 };
|
||||
|
||||
Assert.True(kpi.Success);
|
||||
Assert.Null(kpi.ErrorMessage);
|
||||
Assert.Equal(10, kpi.QueueDepth);
|
||||
Assert.Equal(12, updated.QueueDepth);
|
||||
Assert.Equal(2, updated.StuckCount);
|
||||
Assert.Equal(TimeSpan.FromMinutes(3), updated.OldestPendingAge);
|
||||
Assert.NotEqual(kpi, updated);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationKpiRequest_PositionalConstruction_SetsCorrelationId()
|
||||
{
|
||||
var msg = new NotificationKpiRequest("corr-1");
|
||||
Assert.Equal("corr-1", msg.CorrelationId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PerSiteNotificationKpiRequest_CarriesCorrelationId()
|
||||
{
|
||||
var request = new PerSiteNotificationKpiRequest("corr-1");
|
||||
Assert.Equal("corr-1", request.CorrelationId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PerSiteNotificationKpiResponse_CarriesPerSiteSnapshots()
|
||||
{
|
||||
var sites = new[]
|
||||
{
|
||||
new SiteNotificationKpiSnapshot("plant-a", 3, 1, 0, 10, TimeSpan.FromMinutes(4)),
|
||||
};
|
||||
var response = new PerSiteNotificationKpiResponse("corr-1", Success: true, ErrorMessage: null, sites);
|
||||
|
||||
Assert.True(response.Success);
|
||||
Assert.Null(response.ErrorMessage);
|
||||
Assert.Single(response.Sites);
|
||||
Assert.Equal("plant-a", response.Sites[0].SourceSiteId);
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,8 @@ public class EnumTests
|
||||
[InlineData(typeof(AlarmLevel), new[] { "None", "Low", "LowLow", "High", "HighHigh" })]
|
||||
[InlineData(typeof(AlarmTriggerType), new[] { "ValueMatch", "RangeViolation", "RateOfChange", "HiLo", "Expression" })]
|
||||
[InlineData(typeof(ConnectionHealth), new[] { "Connected", "Disconnected", "Connecting", "Error" })]
|
||||
[InlineData(typeof(NotificationStatus), new[] { "Pending", "Retrying", "Delivered", "Parked", "Discarded" })]
|
||||
[InlineData(typeof(NotificationType), new[] { "Email" })]
|
||||
public void Enum_ShouldHaveExpectedValues(Type enumType, string[] expectedNames)
|
||||
{
|
||||
var actualNames = Enum.GetNames(enumType);
|
||||
@@ -26,6 +28,8 @@ public class EnumTests
|
||||
[InlineData(typeof(AlarmLevel))]
|
||||
[InlineData(typeof(AlarmTriggerType))]
|
||||
[InlineData(typeof(ConnectionHealth))]
|
||||
[InlineData(typeof(NotificationStatus))]
|
||||
[InlineData(typeof(NotificationType))]
|
||||
public void Enum_ShouldBeSingularNamed(Type enumType)
|
||||
{
|
||||
// Singular names should not end with 's' (except 'Status' which is singular)
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
namespace ScadaLink.Commons.Tests.Types;
|
||||
|
||||
public class SiteNotificationKpiSnapshotTests
|
||||
{
|
||||
[Fact]
|
||||
public void Constructor_AssignsAllMembers()
|
||||
{
|
||||
var snapshot = new SiteNotificationKpiSnapshot(
|
||||
SourceSiteId: "plant-a",
|
||||
QueueDepth: 5,
|
||||
StuckCount: 2,
|
||||
ParkedCount: 1,
|
||||
DeliveredLastInterval: 40,
|
||||
OldestPendingAge: TimeSpan.FromMinutes(12));
|
||||
|
||||
Assert.Equal("plant-a", snapshot.SourceSiteId);
|
||||
Assert.Equal(5, snapshot.QueueDepth);
|
||||
Assert.Equal(2, snapshot.StuckCount);
|
||||
Assert.Equal(1, snapshot.ParkedCount);
|
||||
Assert.Equal(40, snapshot.DeliveredLastInterval);
|
||||
Assert.Equal(TimeSpan.FromMinutes(12), snapshot.OldestPendingAge);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OldestPendingAge_IsNullableForSitesWithNoBacklog()
|
||||
{
|
||||
var snapshot = new SiteNotificationKpiSnapshot("plant-b", 0, 0, 0, 0, null);
|
||||
Assert.Null(snapshot.OldestPendingAge);
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ using ScadaLink.Commons.Messages.Communication;
|
||||
using ScadaLink.Commons.Messages.Deployment;
|
||||
using ScadaLink.Commons.Messages.DebugView;
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Communication.Actors;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using Akka.TestKit;
|
||||
@@ -251,6 +252,75 @@ public class CentralCommunicationActorTests : TestKit
|
||||
Assert.Equal("dep1", ((DeployInstanceCommand)msg.Message).DeploymentId);
|
||||
}
|
||||
|
||||
private NotificationSubmit CreateSubmit(string id = "notif1") =>
|
||||
new(id, "ops-list", "Subject", "Body", "site1", "inst1", "script.cs", DateTimeOffset.UtcNow);
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmit_ForwardedToOutboxProxy_AckRoutesBackToSite()
|
||||
{
|
||||
var (actor, _, _) = CreateActorWithMockRepo();
|
||||
var outboxProbe = CreateTestProbe();
|
||||
actor.Tell(new RegisterNotificationOutbox(outboxProbe.Ref));
|
||||
|
||||
// A second probe stands in for the site's ClusterClient (the original Sender).
|
||||
var siteProbe = CreateTestProbe();
|
||||
var submit = CreateSubmit();
|
||||
actor.Tell(submit, siteProbe.Ref);
|
||||
|
||||
// The outbox proxy receives the NotificationSubmit with the site as the sender,
|
||||
// so an ack it sends routes straight back to the site, not the central actor.
|
||||
outboxProbe.ExpectMsg<NotificationSubmit>(m => m.NotificationId == "notif1");
|
||||
outboxProbe.Reply(new NotificationSubmitAck("notif1", Accepted: true, Error: null));
|
||||
siteProbe.ExpectMsg<NotificationSubmitAck>(a => a.NotificationId == "notif1" && a.Accepted);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationStatusQuery_ForwardedToOutboxProxy_ResponseRoutesBackToSite()
|
||||
{
|
||||
var (actor, _, _) = CreateActorWithMockRepo();
|
||||
var outboxProbe = CreateTestProbe();
|
||||
actor.Tell(new RegisterNotificationOutbox(outboxProbe.Ref));
|
||||
|
||||
var siteProbe = CreateTestProbe();
|
||||
var query = new NotificationStatusQuery("corr1", "notif1");
|
||||
actor.Tell(query, siteProbe.Ref);
|
||||
|
||||
outboxProbe.ExpectMsg<NotificationStatusQuery>(m => m.CorrelationId == "corr1");
|
||||
outboxProbe.Reply(new NotificationStatusResponse(
|
||||
"corr1", Found: true, Status: "Delivered", RetryCount: 0,
|
||||
LastError: null, DeliveredAt: DateTimeOffset.UtcNow));
|
||||
siteProbe.ExpectMsg<NotificationStatusResponse>(r => r.CorrelationId == "corr1" && r.Found);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmit_NoOutboxConfigured_RepliesNonAccepted()
|
||||
{
|
||||
var (actor, _, _) = CreateActorWithMockRepo();
|
||||
|
||||
// No RegisterNotificationOutbox sent — the proxy is null.
|
||||
var submit = CreateSubmit();
|
||||
actor.Tell(submit);
|
||||
|
||||
var ack = ExpectMsg<NotificationSubmitAck>();
|
||||
Assert.Equal("notif1", ack.NotificationId);
|
||||
Assert.False(ack.Accepted);
|
||||
Assert.NotNull(ack.Error);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationStatusQuery_NoOutboxConfigured_RepliesNotFound()
|
||||
{
|
||||
var (actor, _, _) = CreateActorWithMockRepo();
|
||||
|
||||
// No RegisterNotificationOutbox sent — the proxy is null.
|
||||
var query = new NotificationStatusQuery("corr1", "notif1");
|
||||
actor.Tell(query);
|
||||
|
||||
var response = ExpectMsg<NotificationStatusResponse>();
|
||||
Assert.Equal("corr1", response.CorrelationId);
|
||||
Assert.False(response.Found);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BothContactPoints_UsedInSingleClient()
|
||||
{
|
||||
|
||||
@@ -3,6 +3,8 @@ using Akka.TestKit.Xunit2;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Messages.Deployment;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Types.Notifications;
|
||||
|
||||
namespace ScadaLink.Communication.Tests;
|
||||
|
||||
@@ -72,6 +74,168 @@ public class CommunicationServiceTests : TestKit
|
||||
Assert.Equal("sha256:applied", response.AppliedRevisionHash);
|
||||
}
|
||||
|
||||
// ── Notification Outbox: central-side outbox actor calls ──
|
||||
|
||||
[Fact]
|
||||
public async Task QueryNotificationOutboxAsync_BeforeOutboxSet_Throws()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
|
||||
await Assert.ThrowsAsync<InvalidOperationException>(() =>
|
||||
service.QueryNotificationOutboxAsync(
|
||||
new NotificationOutboxQueryRequest(
|
||||
"corr-1", null, null, null, null, false, null, null, null, 1, 50)));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RetryNotificationAsync_BeforeOutboxSet_Throws()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
|
||||
await Assert.ThrowsAsync<InvalidOperationException>(() =>
|
||||
service.RetryNotificationAsync(new RetryNotificationRequest("corr-1", "n1")));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DiscardNotificationAsync_BeforeOutboxSet_Throws()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
|
||||
await Assert.ThrowsAsync<InvalidOperationException>(() =>
|
||||
service.DiscardNotificationAsync(new DiscardNotificationRequest("corr-1", "n1")));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetNotificationKpisAsync_BeforeOutboxSet_Throws()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
|
||||
await Assert.ThrowsAsync<InvalidOperationException>(() =>
|
||||
service.GetNotificationKpisAsync(new NotificationKpiRequest("corr-1")));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task QueryNotificationOutboxAsync_AsksOutboxProxyDirectly()
|
||||
{
|
||||
// The outbox actor is central-local: the request must be Asked directly
|
||||
// to the outbox proxy (no SiteEnvelope wrapping).
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
var probe = CreateTestProbe();
|
||||
service.SetNotificationOutbox(probe.Ref);
|
||||
|
||||
var request = new NotificationOutboxQueryRequest(
|
||||
"corr-q", "Pending", null, null, null, true, "alarm", null, null, 2, 25);
|
||||
var task = service.QueryNotificationOutboxAsync(request);
|
||||
|
||||
var received = probe.ExpectMsg<NotificationOutboxQueryRequest>();
|
||||
Assert.Same(request, received);
|
||||
var reply = new NotificationOutboxQueryResponse(
|
||||
"corr-q", true, null, Array.Empty<NotificationSummary>(), 0);
|
||||
probe.Reply(reply);
|
||||
|
||||
Assert.Same(reply, await task);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RetryNotificationAsync_AsksOutboxProxyDirectly()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
var probe = CreateTestProbe();
|
||||
service.SetNotificationOutbox(probe.Ref);
|
||||
|
||||
var request = new RetryNotificationRequest("corr-r", "n-7");
|
||||
var task = service.RetryNotificationAsync(request);
|
||||
|
||||
var received = probe.ExpectMsg<RetryNotificationRequest>();
|
||||
Assert.Same(request, received);
|
||||
var reply = new RetryNotificationResponse("corr-r", true, null);
|
||||
probe.Reply(reply);
|
||||
|
||||
Assert.Same(reply, await task);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DiscardNotificationAsync_AsksOutboxProxyDirectly()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
var probe = CreateTestProbe();
|
||||
service.SetNotificationOutbox(probe.Ref);
|
||||
|
||||
var request = new DiscardNotificationRequest("corr-d", "n-9");
|
||||
var task = service.DiscardNotificationAsync(request);
|
||||
|
||||
var received = probe.ExpectMsg<DiscardNotificationRequest>();
|
||||
Assert.Same(request, received);
|
||||
var reply = new DiscardNotificationResponse("corr-d", false, "already delivered");
|
||||
probe.Reply(reply);
|
||||
|
||||
var result = await task;
|
||||
Assert.Same(reply, result);
|
||||
Assert.False(result.Success);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetNotificationKpisAsync_AsksOutboxProxyDirectly()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
var probe = CreateTestProbe();
|
||||
service.SetNotificationOutbox(probe.Ref);
|
||||
|
||||
var request = new NotificationKpiRequest("corr-k");
|
||||
var task = service.GetNotificationKpisAsync(request);
|
||||
|
||||
var received = probe.ExpectMsg<NotificationKpiRequest>();
|
||||
Assert.Same(request, received);
|
||||
var reply = new NotificationKpiResponse("corr-k", true, null, 3, 1, 0, 12, TimeSpan.FromMinutes(5));
|
||||
probe.Reply(reply);
|
||||
|
||||
var result = await task;
|
||||
Assert.Same(reply, result);
|
||||
Assert.Equal(3, result.QueueDepth);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetPerSiteNotificationKpisAsync_AsksOutboxProxyDirectly()
|
||||
{
|
||||
var service = new CommunicationService(
|
||||
Options.Create(new CommunicationOptions()),
|
||||
NullLogger<CommunicationService>.Instance);
|
||||
var probe = CreateTestProbe();
|
||||
service.SetNotificationOutbox(probe.Ref);
|
||||
|
||||
var request = new PerSiteNotificationKpiRequest("corr-ps");
|
||||
var task = service.GetPerSiteNotificationKpisAsync(request);
|
||||
|
||||
var received = probe.ExpectMsg<PerSiteNotificationKpiRequest>();
|
||||
Assert.Same(request, received);
|
||||
var reply = new PerSiteNotificationKpiResponse(
|
||||
"corr-ps", true, null,
|
||||
new[] { new SiteNotificationKpiSnapshot("plant-a", 2, 0, 0, 5, null) });
|
||||
probe.Reply(reply);
|
||||
|
||||
var result = await task;
|
||||
Assert.Same(reply, result);
|
||||
Assert.True(result.Success);
|
||||
Assert.Single(result.Sites);
|
||||
Assert.Equal("plant-a", result.Sites[0].SourceSiteId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stand-in for CentralCommunicationActor: verifies the message is wrapped
|
||||
/// in a SiteEnvelope targeting the requested site and replies with a typed
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster.Tools.Client;
|
||||
using Akka.TestKit.Xunit2;
|
||||
using ScadaLink.Commons.Messages.Deployment;
|
||||
using ScadaLink.Commons.Messages.Lifecycle;
|
||||
using ScadaLink.Commons.Messages.Integration;
|
||||
using ScadaLink.Commons.Messages.Notification;
|
||||
using ScadaLink.Commons.Messages.RemoteQuery;
|
||||
using ScadaLink.Communication.Actors;
|
||||
|
||||
@@ -103,6 +105,101 @@ public class SiteCommunicationActorTests : TestKit
|
||||
handlerProbe.ExpectMsg<IntegrationCallRequest>(msg => msg.CorrelationId == "corr1");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmit_WithCentralClient_ForwardedToCentralAndAckRoutedBack()
|
||||
{
|
||||
// The site forwards a buffered notification to central over the ClusterClient
|
||||
// command/control transport; the central ack must route back to the original
|
||||
// sender (the S&F forwarder's Ask), not to the SiteCommunicationActor.
|
||||
var dmProbe = CreateTestProbe();
|
||||
var centralClientProbe = CreateTestProbe();
|
||||
var siteActor = Sys.ActorOf(Props.Create(() =>
|
||||
new SiteCommunicationActor("site1", _options, dmProbe.Ref)));
|
||||
|
||||
siteActor.Tell(new RegisterCentralClient(centralClientProbe.Ref));
|
||||
|
||||
var submit = new NotificationSubmit(
|
||||
"notif-1", "Operators", "Subj", "Body", "site1", "inst1", "alarmScript",
|
||||
DateTimeOffset.UtcNow);
|
||||
siteActor.Tell(submit);
|
||||
|
||||
// Central client (acting as ClusterClient) receives a ClusterClient.Send wrapping
|
||||
// the NotificationSubmit, addressed to the central communication actor. Fish past
|
||||
// any periodic HeartbeatMessage the actor's timer may interleave.
|
||||
var send = centralClientProbe.FishForMessage<ClusterClient.Send>(
|
||||
s => s.Message is NotificationSubmit);
|
||||
Assert.Equal("/user/central-communication", send.Path);
|
||||
var forwarded = Assert.IsType<NotificationSubmit>(send.Message);
|
||||
Assert.Equal("notif-1", forwarded.NotificationId);
|
||||
|
||||
// The ack is sent to the ClusterClient.Send's Sender — replying as that probe
|
||||
// must land back at the test actor (the original Tell sender).
|
||||
centralClientProbe.Reply(new NotificationSubmitAck("notif-1", Accepted: true, Error: null));
|
||||
ExpectMsg<NotificationSubmitAck>(ack => ack.NotificationId == "notif-1" && ack.Accepted);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationSubmit_WithoutCentralClient_RepliesWithNonAccepted()
|
||||
{
|
||||
// No ClusterClient registered yet: the submit cannot be forwarded, so the actor
|
||||
// replies with a non-accepted ack and the S&F forwarder treats it as transient.
|
||||
var dmProbe = CreateTestProbe();
|
||||
var siteActor = Sys.ActorOf(Props.Create(() =>
|
||||
new SiteCommunicationActor("site1", _options, dmProbe.Ref)));
|
||||
|
||||
var submit = new NotificationSubmit(
|
||||
"notif-2", "Operators", "Subj", "Body", "site1", null, null,
|
||||
DateTimeOffset.UtcNow);
|
||||
siteActor.Tell(submit);
|
||||
|
||||
ExpectMsg<NotificationSubmitAck>(ack => ack.NotificationId == "notif-2" && !ack.Accepted);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationStatusQuery_WithCentralClient_ForwardedToCentralAndResponseRoutedBack()
|
||||
{
|
||||
// Notify.Status(id) issues a NotificationStatusQuery; the site actor forwards it
|
||||
// to central over the ClusterClient command/control transport and the central
|
||||
// response must route back to the original sender (the helper's Ask).
|
||||
var dmProbe = CreateTestProbe();
|
||||
var centralClientProbe = CreateTestProbe();
|
||||
var siteActor = Sys.ActorOf(Props.Create(() =>
|
||||
new SiteCommunicationActor("site1", _options, dmProbe.Ref)));
|
||||
|
||||
siteActor.Tell(new RegisterCentralClient(centralClientProbe.Ref));
|
||||
|
||||
var query = new NotificationStatusQuery("corr-99", "notif-1");
|
||||
siteActor.Tell(query);
|
||||
|
||||
var send = centralClientProbe.FishForMessage<ClusterClient.Send>(
|
||||
s => s.Message is NotificationStatusQuery);
|
||||
Assert.Equal("/user/central-communication", send.Path);
|
||||
var forwarded = Assert.IsType<NotificationStatusQuery>(send.Message);
|
||||
Assert.Equal("notif-1", forwarded.NotificationId);
|
||||
|
||||
// The response is sent to the ClusterClient.Send's Sender — replying as that
|
||||
// probe must land back at the test actor (the original Tell sender).
|
||||
centralClientProbe.Reply(new NotificationStatusResponse(
|
||||
"corr-99", Found: true, Status: "Delivered", RetryCount: 0,
|
||||
LastError: null, DeliveredAt: DateTimeOffset.UtcNow));
|
||||
ExpectMsg<NotificationStatusResponse>(r => r.CorrelationId == "corr-99" && r.Found);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotificationStatusQuery_WithoutCentralClient_RepliesWithNotFound()
|
||||
{
|
||||
// No ClusterClient registered yet: the query cannot reach central, so the actor
|
||||
// replies Found: false. Notify.Status then falls back to the site S&F buffer.
|
||||
var dmProbe = CreateTestProbe();
|
||||
var siteActor = Sys.ActorOf(Props.Create(() =>
|
||||
new SiteCommunicationActor("site1", _options, dmProbe.Ref)));
|
||||
|
||||
siteActor.Tell(new NotificationStatusQuery("corr-100", "notif-2"));
|
||||
|
||||
ExpectMsg<NotificationStatusResponse>(
|
||||
r => r.CorrelationId == "corr-100" && !r.Found);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EventLogQuery_WithoutHandler_ReturnsFailure()
|
||||
{
|
||||
|
||||
+91
@@ -0,0 +1,91 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.ConfigurationDatabase;
|
||||
using ScadaLink.ConfigurationDatabase.Repositories;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Tests;
|
||||
|
||||
// Coverage for per-site KPI aggregation in the Notification Outbox repository
|
||||
// (Task 2 of the notifications-nav-group feature).
|
||||
public class NotificationOutboxRepositoryPerSiteKpiTests
|
||||
{
|
||||
private static ScadaLinkDbContext NewContext() => SqliteTestHelper.CreateInMemoryContext();
|
||||
|
||||
private static Notification NewNotification(
|
||||
string sourceSiteId,
|
||||
NotificationStatus status,
|
||||
DateTimeOffset createdAt,
|
||||
DateTimeOffset? deliveredAt = null)
|
||||
{
|
||||
return new Notification(
|
||||
Guid.NewGuid().ToString(), NotificationType.Email, "Ops List", "Subject", "Body", sourceSiteId)
|
||||
{
|
||||
Status = status,
|
||||
CreatedAt = createdAt,
|
||||
DeliveredAt = deliveredAt,
|
||||
};
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ComputePerSiteKpisAsync_AggregatesMetricsPerSite()
|
||||
{
|
||||
await using var ctx = NewContext();
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
// plant-a: 1 pending (stuck, created 20m ago), 1 parked
|
||||
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Pending, createdAt: now.AddMinutes(-20)));
|
||||
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Parked, createdAt: now.AddMinutes(-5)));
|
||||
// plant-b: 1 delivered in-window, 1 pending (fresh)
|
||||
ctx.Notifications.Add(NewNotification("plant-b", NotificationStatus.Delivered, createdAt: now.AddHours(-2), deliveredAt: now.AddMinutes(-2)));
|
||||
ctx.Notifications.Add(NewNotification("plant-b", NotificationStatus.Pending, createdAt: now.AddMinutes(-1)));
|
||||
// plant-c: 2 non-terminal rows of clearly different ages — pending 90m ago,
|
||||
// retrying 40m ago. Both predate the 10m stuck cutoff. Exercises the
|
||||
// in-memory g.Min(CreatedAt) oldest-age reduction and the Retrying branch
|
||||
// of the QueueDepth/StuckCount predicates.
|
||||
ctx.Notifications.Add(NewNotification("plant-c", NotificationStatus.Pending, createdAt: now.AddMinutes(-90)));
|
||||
ctx.Notifications.Add(NewNotification("plant-c", NotificationStatus.Retrying, createdAt: now.AddMinutes(-40)));
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
var repo = new NotificationOutboxRepository(ctx);
|
||||
var result = await repo.ComputePerSiteKpisAsync(
|
||||
stuckCutoff: now.AddMinutes(-10), deliveredSince: now.AddMinutes(-30));
|
||||
|
||||
var a = result.Single(s => s.SourceSiteId == "plant-a");
|
||||
Assert.Equal(1, a.QueueDepth);
|
||||
Assert.Equal(1, a.StuckCount);
|
||||
Assert.Equal(1, a.ParkedCount);
|
||||
Assert.Equal(0, a.DeliveredLastInterval);
|
||||
Assert.NotNull(a.OldestPendingAge);
|
||||
|
||||
var b = result.Single(s => s.SourceSiteId == "plant-b");
|
||||
Assert.Equal(1, b.QueueDepth);
|
||||
Assert.Equal(0, b.StuckCount);
|
||||
Assert.Equal(1, b.DeliveredLastInterval);
|
||||
|
||||
// plant-c: both the Pending and Retrying rows count toward QueueDepth;
|
||||
// both predate the stuck cutoff so both are stuck. OldestPendingAge must
|
||||
// reflect the older (90m) row, not the 10m Retrying one.
|
||||
var c = result.Single(s => s.SourceSiteId == "plant-c");
|
||||
Assert.Equal(2, c.QueueDepth);
|
||||
Assert.Equal(2, c.StuckCount);
|
||||
Assert.Equal(0, c.ParkedCount);
|
||||
Assert.NotNull(c.OldestPendingAge);
|
||||
// Tolerant lower bound to absorb clock skew between seed time and the
|
||||
// `now` captured inside ComputePerSiteKpisAsync.
|
||||
Assert.True(c.OldestPendingAge >= TimeSpan.FromMinutes(85),
|
||||
$"expected OldestPendingAge >= 85m, got {c.OldestPendingAge}");
|
||||
Assert.True(c.OldestPendingAge < TimeSpan.FromMinutes(95),
|
||||
$"expected OldestPendingAge < 95m, got {c.OldestPendingAge}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ComputePerSiteKpisAsync_ReturnsEmpty_WhenNoNotifications()
|
||||
{
|
||||
await using var ctx = NewContext();
|
||||
var repo = new NotificationOutboxRepository(ctx);
|
||||
var result = await repo.ComputePerSiteKpisAsync(
|
||||
DateTimeOffset.UtcNow, DateTimeOffset.UtcNow.AddMinutes(-30));
|
||||
Assert.Empty(result);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user