Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| adf1bd2693 | |||
| bbff1d19b5 | |||
| 2a7ff03718 | |||
| 38e48299a4 | |||
| 43228185b4 | |||
| e3ca5ac0cf | |||
| 4c5e7eb917 | |||
| bdee12f4e9 | |||
| 3763f6d2d8 | |||
| 300841b205 |
@@ -15,7 +15,6 @@
|
||||
<PackageVersion Include="Akka.Streams" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Streams.TestKit" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.TestKit.Xunit2" Version="1.5.62" />
|
||||
<PackageVersion Include="AspNetCore.HealthChecks.UI.Client" Version="9.0.0" />
|
||||
<PackageVersion Include="bunit" Version="2.0.33-preview" />
|
||||
<PackageVersion Include="coverlet.collector" Version="6.0.4" />
|
||||
<PackageVersion Include="FluentAssertions" Version="8.3.0" />
|
||||
@@ -73,6 +72,9 @@
|
||||
to mark tests as Skipped (not silently Passed) when MSSQL is unreachable.
|
||||
-->
|
||||
<PackageVersion Include="Xunit.SkippableFact" Version="1.5.61" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.Akka" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.EntityFrameworkCore" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Client" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Contracts" Version="0.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# SCADA System — Design Documentation
|
||||
# ScadaBridge
|
||||
|
||||
ScadaBridge is a centrally-managed, distributed SCADA configuration and deployment platform built on Akka.NET, running across a central cluster and multiple site clusters in a hub-and-spoke topology.
|
||||
|
||||
## Overview
|
||||
|
||||
This document serves as the master index for the SCADA system design. The system is a centrally-managed, distributed SCADA configuration and deployment platform built on Akka.NET, running across a central cluster and multiple site clusters in a hub-and-spoke topology.
|
||||
This repository is the full **implementation** project for ScadaBridge — the C#/.NET source (`src/`), tests (`tests/`), deployable Docker topology (`docker/`, `docker-env2/`, `infra/`), and the design documentation (`docs/`) that the code implements. This README is the master index: it links the per-component **design specs** (the spec the code in `src/` implements) and shows the system architecture. The solution file is `ZB.MOM.WW.ScadaBridge.slnx`.
|
||||
|
||||
### Technology Stack
|
||||
|
||||
@@ -24,6 +26,38 @@ This document serves as the master index for the SCADA system design. The system
|
||||
- Central cluster: 2-node active/standby behind a load balancer.
|
||||
- Site clusters: 2-node active/standby, headless (no UI).
|
||||
|
||||
## Repository Layout
|
||||
|
||||
| Path | Contents |
|
||||
|------|----------|
|
||||
| `src/` | C#/.NET implementation — one project per component (`ZB.MOM.WW.ScadaBridge.<Component>`). Solution: `ZB.MOM.WW.ScadaBridge.slnx`. |
|
||||
| `tests/` | Unit and integration test projects. |
|
||||
| `docs/` | Design documentation — `docs/requirements/` (high-level + per-component specs, the spec the code implements), `docs/test_infra/`, `docs/plans/`. |
|
||||
| `docker/` | Primary 8-node cluster topology (2 central + 3 sites × 2 nodes + Traefik) + `deploy.sh`. |
|
||||
| `docker-env2/` | Minimal second cluster (2 central + 1 site) for exercising Transport (#24) against a real second environment. |
|
||||
| `infra/` | Local test services (MS SQL, LDAP, OPC UA, SMTP, REST API, Traefik). |
|
||||
| `deploy/` | Production/on-host deployment artifacts (e.g. `wonder-app-vd03/`). |
|
||||
| `AkkaDotNet/` | Akka.NET reference notes. |
|
||||
|
||||
## Build, Test & Run
|
||||
|
||||
```bash
|
||||
# Build the solution
|
||||
dotnet build ZB.MOM.WW.ScadaBridge.slnx
|
||||
|
||||
# Run the tests
|
||||
dotnet test ZB.MOM.WW.ScadaBridge.slnx
|
||||
|
||||
# Bring up the primary local cluster (builds the scadabridge:latest image + recreates containers)
|
||||
bash docker/deploy.sh # central load balancer at http://localhost:9000
|
||||
|
||||
# Drive the system from the CLI (reads ~/.scadabridge/config.json; test user has all roles)
|
||||
dotnet run --project src/ZB.MOM.WW.ScadaBridge.CLI -- \
|
||||
--username multi-role --password password template list
|
||||
```
|
||||
|
||||
See [`docker/README.md`](docker/README.md) for ports and management commands, and [`src/ZB.MOM.WW.ScadaBridge.CLI/README.md`](src/ZB.MOM.WW.ScadaBridge.CLI/README.md) for the full CLI reference.
|
||||
|
||||
## Local Test Environments
|
||||
|
||||
Two Docker-based cluster topologies are available for local development and testing:
|
||||
@@ -76,102 +110,52 @@ Both stacks share the infrastructure services in [`infra/`](infra/) (MS SQL, LDA
|
||||
|
||||
### Architecture Diagram (Logical)
|
||||
|
||||
```
|
||||
Users (Blazor Server)
|
||||
│
|
||||
Load Balancer
|
||||
│
|
||||
┌────────────────────────┼────────────────────────────┐
|
||||
│ CENTRAL CLUSTER │
|
||||
│ (2-node active/standby) │
|
||||
│ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Template │ │Deployment│ │ Central │ │
|
||||
│ │ Engine │ │ Manager │ │ UI │ Blazor Svr │
|
||||
│ └──────────┘ └──────────┘ └──────────┘ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Security │ │ Config │ │ Health │ │
|
||||
│ │ & Auth │ │ DB │ │ Monitor │ │
|
||||
│ │ (JWT/LDAP)│ │ (EF+IAud)│ │ │ │
|
||||
│ └──────────┘ └──────────┘ └──────────┘ │
|
||||
│ ┌──────────┐ │
|
||||
│ │ Inbound │ ◄── External Systems (X-API-Key) │
|
||||
│ │ API │ POST /api/{method}, JSON │
|
||||
│ └──────────┘ │
|
||||
│ ┌──────────┐ │
|
||||
│ │ Mgmt │ ◄── CLI (ClusterClient) │
|
||||
│ │ Service │ ManagementActor + Receptionist │
|
||||
│ └──────────┘ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Ntf │ │ Site │ │ Audit │ Observ. / │
|
||||
│ │ Outbox │ │ Call │ │ Log │ Audit area │
|
||||
│ │ (#21) │ │ Audit │ │ (#23) │ │
|
||||
│ │ │ │ (#22) │ │ │ │
|
||||
│ └────▲─────┘ └────▲─────┘ └────▲─────┘ │
|
||||
│ │ ingests │ ingests │ ingests │
|
||||
│ │ (S&F) │ (telemetry)│ (telemetry + │
|
||||
│ │ │ │ direct-write │
|
||||
│ │ │ │ from Ntf Outbox │
|
||||
│ │ │ │ & Inbound API) │
|
||||
│ ┌───────────────────────────────────┐ │
|
||||
│ │ Akka.NET Communication Layer │ │
|
||||
│ │ ClusterClient: command/control │ │
|
||||
│ │ gRPC Client: real-time streams │ │
|
||||
│ │ (correlation IDs, per-pattern │ │
|
||||
│ │ timeouts, message ordering) │ │
|
||||
│ └──────────────┬────────────────────┘ │
|
||||
│ ┌──────────────┴────────────────────┐ │
|
||||
│ │ Configuration Database (EF) │──► MS SQL │
|
||||
│ └───────────────────────────────────┘ (Config DB)│
|
||||
│ │ Machine Data DB│
|
||||
└─────────────────┼───────────────────────────────────┘
|
||||
│ Akka.NET Remoting (command/control)
|
||||
│ gRPC HTTP/2 (real-time data, port 8083)
|
||||
┌────────────┼────────────┐
|
||||
▼ ▼ ▼
|
||||
┌─────────┐ ┌─────────┐ ┌─────────┐
|
||||
│ SITE A │ │ SITE B │ │ SITE N │
|
||||
│ (2-node)│ │ (2-node)│ │ (2-node)│
|
||||
│ ┌─────┐ │ │ ┌─────┐ │ │ ┌─────┐ │
|
||||
│ │Data │ │ │ │Data │ │ │ │Data │ │
|
||||
│ │Conn │ │ │ │Conn │ │ │ │Conn │ │
|
||||
│ │Layer │ │ │ │Layer │ │ │ │Layer │ │
|
||||
│ ├─────┤ │ │ ├─────┤ │ │ ├─────┤ │
|
||||
│ │Site │ │ │ │Site │ │ │ │Site │ │
|
||||
│ │Runtm│ │ │ │Runtm│ │ │ │Runtm│ │
|
||||
│ ├─────┤ │ │ ├─────┤ │ │ ├─────┤ │
|
||||
│ │gRPC │ │ │ │gRPC │ │ │ │gRPC │ │
|
||||
│ │Srvr │ │ │ │Srvr │ │ │ │Srvr │ │
|
||||
│ ├─────┤ │ │ ├─────┤ │ │ ├─────┤ │
|
||||
│ │S&F │ │ │ │S&F │ │ │ │S&F │ │
|
||||
│ │Engine│ │ │ │Engine│ │ │ │Engine│ │
|
||||
│ ├─────┤ │ │ ├─────┤ │ │ ├─────┤ │
|
||||
│ │ExtSys│ │ │ │ExtSys│ │ │ │ExtSys│ │
|
||||
│ │Gatwy │ │ │ │Gatwy │ │ │ │Gatwy │ │
|
||||
│ └─────┘ │ │ └─────┘ │ │ └─────┘ │
|
||||
│ SQLite │ │ SQLite │ │ SQLite │
|
||||
└─────────┘ └─────────┘ └─────────┘
|
||||
│ │ │
|
||||
OPC UA / OPC UA / OPC UA /
|
||||
Custom Custom Custom
|
||||
Protocol Protocol Protocol
|
||||
```
|
||||

|
||||
<!-- source: diagrams/architecture-logical.drawio — edit, then re-export with export-drawio.sh -->
|
||||
|
||||
### Site Runtime Actor Hierarchy
|
||||
|
||||
```
|
||||
Deployment Manager Singleton (Cluster Singleton)
|
||||
├── Instance Actor (one per deployed, enabled instance)
|
||||
│ ├── Script Actor (coordinator, one per instance script)
|
||||
│ │ └── Script Execution Actor (short-lived, per invocation)
|
||||
│ ├── Alarm Actor (coordinator, one per alarm definition)
|
||||
│ │ └── Alarm Execution Actor (short-lived, per on-trigger invocation)
|
||||
│ └── ... (more Script/Alarm Actors)
|
||||
├── Instance Actor
|
||||
│ └── ...
|
||||
└── ... (more Instance Actors)
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
DMS["Deployment Manager Singleton<br/>(Cluster Singleton)"]
|
||||
IA["Instance Actor<br/>(one per deployed, enabled instance)"]
|
||||
IA2["Instance Actor<br/>( … )"]
|
||||
MOREIA["… more Instance Actors"]
|
||||
DMS --> IA
|
||||
DMS --> IA2
|
||||
DMS -.-> MOREIA
|
||||
|
||||
Site-Wide Akka Stream (attribute + alarm state changes)
|
||||
├── All Instance Actors publish to the stream
|
||||
└── Debug view subscribes with instance-level filtering
|
||||
SA["Script Actor<br/>(coordinator, one per instance script)"]
|
||||
AA["Alarm Actor<br/>(coordinator, one per alarm definition)"]
|
||||
MORE1["… more Script /<br/>Alarm Actors"]
|
||||
IA --> SA
|
||||
IA --> AA
|
||||
IA -.-> MORE1
|
||||
|
||||
SEA["Script Execution Actor<br/>(short-lived, per invocation)"]
|
||||
AEA["Alarm Execution Actor<br/>(short-lived, per on-trigger invocation)"]
|
||||
IA2C["… (Script / Alarm Actors)"]
|
||||
SA --> SEA
|
||||
AA --> AEA
|
||||
IA2 -.-> IA2C
|
||||
|
||||
subgraph STREAM["Site-Wide Akka Stream"]
|
||||
PUB["All Instance Actors"]
|
||||
STR["Site-Wide Akka Stream<br/>(attribute + alarm state changes)"]
|
||||
DBG["Debug view<br/>(instance-level filtering)"]
|
||||
PUB -->|publish| STR
|
||||
STR -->|subscribe filtered| DBG
|
||||
end
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class DMS,STR alt
|
||||
class IA,IA2,PUB proc
|
||||
class SA,AA,DBG start
|
||||
class SEA,AEA warn
|
||||
class MOREIA,MORE1,IA2C muted
|
||||
```
|
||||
|
||||
@@ -0,0 +1,214 @@
|
||||
<mxfile host="app.diagrams.net">
|
||||
<diagram id="arch-logical" name="Logical Architecture">
|
||||
<mxGraphModel dx="1200" dy="900" grid="1" gridSize="10" guides="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1000" pageHeight="1200" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
|
||||
<!-- top: users + load balancer -->
|
||||
<mxCell id="users" value="Users (Blazor Server)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="430" y="20" width="180" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="lb" value="Load Balancer / Traefik" style="whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="1">
|
||||
<mxGeometry x="430" y="92" width="180" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_users_lb" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;" edge="1" parent="1" source="users" target="lb">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_lb_central" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;" edge="1" parent="1" source="lb" target="central">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- central cluster container -->
|
||||
<mxCell id="central" value="CENTRAL CLUSTER — 2-node active / standby" style="rounded=0;whiteSpace=wrap;html=1;verticalAlign=top;fontStyle=1;fontSize=14;fillColor=#eef3fb;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="40" y="160" width="740" height="490" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<mxCell id="te" value="Template Engine" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="70" y="206" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="dm" value="Deployment Manager" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="300" y="206" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ui" value="Central UI (Blazor Server)" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="530" y="206" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<mxCell id="sec" value="Security & Auth (JWT / LDAP)" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="70" y="270" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="cfg" value="Configuration DB (EF + IAudit)" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="300" y="270" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="health" value="Health Monitor" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="530" y="270" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<mxCell id="inapi" value="Inbound API" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="70" y="338" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="extsys" value="External Systems (X-API-Key)" style="shape=cloud;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="1">
|
||||
<mxGeometry x="548" y="352" width="184" height="48" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_ext_in" value="POST /api/{method} · JSON" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;fontSize=10;" edge="1" parent="1" source="extsys" target="inapi">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<mxCell id="mgmt" value="Management Service" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
|
||||
<mxGeometry x="70" y="402" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="cli" value="CLI (ClusterClient)" style="shape=cloud;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="1">
|
||||
<mxGeometry x="548" y="414" width="184" height="48" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_cli_mgmt" value="ManagementActor + Receptionist" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;fontSize=10;" edge="1" parent="1" source="cli" target="mgmt">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<mxCell id="obslabel" value="Observability / Audit" style="text;html=1;align=left;verticalAlign=middle;fontStyle=2;fontSize=11;" vertex="1" parent="1">
|
||||
<mxGeometry x="70" y="456" width="300" height="18" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ntf" value="Notification Outbox (#21)" style="whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="1">
|
||||
<mxGeometry x="70" y="478" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="sca" value="Site Call Audit (#22)" style="whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="1">
|
||||
<mxGeometry x="300" y="478" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="audit" value="Audit Log (#23)" style="whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="1">
|
||||
<mxGeometry x="530" y="478" width="200" height="44" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<mxCell id="comm" value="Akka.NET Communication Layer ClusterClient (command/control) · gRPC Client (real-time streams)" style="whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="1">
|
||||
<mxGeometry x="70" y="558" width="430" height="64" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mssql" value="MS SQL Config DB · Machine Data DB" style="shape=cylinder3;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;" vertex="1" parent="1">
|
||||
<mxGeometry x="580" y="556" width="200" height="70" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_cfg_sql" value="EF Core" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;dashed=1;fontSize=10;exitX=1;exitY=0.7;entryX=0;entryY=0.4;" edge="1" parent="1" source="cfg" target="mssql">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<Array as="points">
|
||||
<mxPoint x="515" y="301" />
|
||||
<mxPoint x="515" y="560" />
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
|
||||
<!-- ingests edges -->
|
||||
<mxCell id="e_ing_ntf" value="ingests (S&F)" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;dashed=1;fontSize=9;" edge="1" parent="1" source="comm" target="ntf">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_ing_sca" value="ingests (telemetry)" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;dashed=1;fontSize=9;" edge="1" parent="1" source="comm" target="sca">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_ing_audit" value="ingests (telemetry + direct-write)" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;dashed=1;fontSize=9;exitX=0.95;exitY=0;entryX=0.1;entryY=1;" edge="1" parent="1" source="comm" target="audit">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- transport annotation between central and sites -->
|
||||
<mxCell id="transport" value="Akka.NET Remoting (command/control) · gRPC HTTP/2 (real-time data, port 8083)" style="text;html=1;align=center;verticalAlign=middle;fontStyle=2;fontSize=11;" vertex="1" parent="1">
|
||||
<mxGeometry x="120" y="662" width="580" height="28" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- SITE A -->
|
||||
<mxCell id="siteA" value="SITE A — 2-node" style="rounded=0;whiteSpace=wrap;html=1;verticalAlign=top;fontStyle=1;fillColor=#eafaf0;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="40" y="720" width="230" height="364" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="a_dcl" value="Data Connection Layer" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="758" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="a_rt" value="Site Runtime" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="806" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="a_grpc" value="gRPC Server" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="854" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="a_snf" value="Store-and-Forward Engine" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="902" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="a_esg" value="External System Gateway" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="950" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="a_sql" value="SQLite" style="shape=cylinder3;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;" vertex="1" parent="1">
|
||||
<mxGeometry x="85" y="1002" width="140" height="64" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- SITE B -->
|
||||
<mxCell id="siteB" value="SITE B — 2-node" style="rounded=0;whiteSpace=wrap;html=1;verticalAlign=top;fontStyle=1;fillColor=#eafaf0;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="295" y="720" width="230" height="364" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="b_dcl" value="Data Connection Layer" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="315" y="758" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="b_rt" value="Site Runtime" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="315" y="806" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="b_grpc" value="gRPC Server" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="315" y="854" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="b_snf" value="Store-and-Forward Engine" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="315" y="902" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="b_esg" value="External System Gateway" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="315" y="950" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="b_sql" value="SQLite" style="shape=cylinder3;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;" vertex="1" parent="1">
|
||||
<mxGeometry x="340" y="1002" width="140" height="64" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- SITE N -->
|
||||
<mxCell id="siteN" value="SITE N — 2-node" style="rounded=0;whiteSpace=wrap;html=1;verticalAlign=top;fontStyle=1;fillColor=#eafaf0;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="550" y="720" width="230" height="364" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="n_dcl" value="Data Connection Layer" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="570" y="758" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="n_rt" value="Site Runtime" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="570" y="806" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="n_grpc" value="gRPC Server" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="570" y="854" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="n_snf" value="Store-and-Forward Engine" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="570" y="902" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="n_esg" value="External System Gateway" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
|
||||
<mxGeometry x="570" y="950" width="190" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="n_sql" value="SQLite" style="shape=cylinder3;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;" vertex="1" parent="1">
|
||||
<mxGeometry x="595" y="1002" width="140" height="64" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- central -> sites transport edges -->
|
||||
<mxCell id="e_c_a" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;strokeWidth=1.5;" edge="1" parent="1" source="comm" target="siteA">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_c_b" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;strokeWidth=1.5;" edge="1" parent="1" source="comm" target="siteB">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_c_n" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;strokeWidth=1.5;" edge="1" parent="1" source="comm" target="siteN">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- field protocol per site -->
|
||||
<mxCell id="a_proto" value="OPC UA / Custom Protocol" style="whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=11;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="1100" width="190" height="36" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="b_proto" value="OPC UA / Custom Protocol" style="whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=11;" vertex="1" parent="1">
|
||||
<mxGeometry x="315" y="1100" width="190" height="36" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="n_proto" value="OPC UA / Custom Protocol" style="whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=11;" vertex="1" parent="1">
|
||||
<mxGeometry x="570" y="1100" width="190" height="36" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_a_proto" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;" edge="1" parent="1" source="a_dcl" target="a_proto">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_b_proto" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;" edge="1" parent="1" source="b_dcl" target="b_proto">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e_n_proto" style="edgeStyle=orthogonalEdgeStyle;html=1;endArrow=block;" edge="1" parent="1" source="n_dcl" target="n_proto">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 429 KiB |
@@ -6,23 +6,50 @@ ScadaBridge uses a hub-and-spoke architecture:
|
||||
- **Central Cluster**: Two-node active/standby Akka.NET cluster for management, UI, and coordination.
|
||||
- **Site Clusters**: Two-node active/standby Akka.NET clusters at each remote site for data collection and local processing.
|
||||
|
||||
```
|
||||
┌──────────────────────────┐
|
||||
│ Central Cluster │
|
||||
│ ┌──────┐ ┌──────┐ │
|
||||
Users ──────────► │ │Node A│◄──►│Node B│ │
|
||||
(HTTPS/LB) │ │Active│ │Stby │ │
|
||||
│ └──┬───┘ └──┬───┘ │
|
||||
└─────┼───────────┼────────┘
|
||||
│ │
|
||||
┌───────────┼───────────┼───────────┐
|
||||
│ │ │ │
|
||||
┌─────▼─────┐ ┌──▼──────┐ ┌──▼──────┐ ┌──▼──────┐
|
||||
│ Site 01 │ │ Site 02 │ │ Site 03 │ │ Site N │
|
||||
│ ┌──┐ ┌──┐ │ │ ┌──┐┌──┐│ │ ┌──┐┌──┐│ │ ┌──┐┌──┐│
|
||||
│ │A │ │B │ │ │ │A ││B ││ │ │A ││B ││ │ │A ││B ││
|
||||
│ └──┘ └──┘ │ │ └──┘└──┘│ │ └──┘└──┘│ │ └──┘└──┘│
|
||||
└───────────┘ └─────────┘ └─────────┘ └─────────┘
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
USERS["Users<br/>(HTTPS / LB)"]
|
||||
|
||||
subgraph CENTRAL["Central Cluster"]
|
||||
NA["Node A<br/>Active"]
|
||||
NB["Node B<br/>Standby"]
|
||||
NA <--> NB
|
||||
end
|
||||
|
||||
USERS --> NA
|
||||
CENTRAL --> SITE01
|
||||
CENTRAL --> SITE02
|
||||
CENTRAL --> SITE03
|
||||
CENTRAL --> SITEN
|
||||
|
||||
subgraph SITE01["Site 01"]
|
||||
S01A["A<br/>Active"]
|
||||
S01B["B<br/>Standby"]
|
||||
end
|
||||
subgraph SITE02["Site 02"]
|
||||
S02A["A<br/>Active"]
|
||||
S02B["B<br/>Standby"]
|
||||
end
|
||||
subgraph SITE03["Site 03"]
|
||||
S03A["A<br/>Active"]
|
||||
S03B["B<br/>Standby"]
|
||||
end
|
||||
subgraph SITEN["Site N"]
|
||||
SNA["A<br/>Active"]
|
||||
SNB["B<br/>Standby"]
|
||||
end
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class USERS dec
|
||||
class CENTRAL proc
|
||||
class NA,S01A,S02A,S03A,SNA start
|
||||
class NB,S01B,S02B,S03B,SNB muted
|
||||
class SITE01,SITE02,SITE03,SITEN warn
|
||||
```
|
||||
|
||||
## Central Cluster Setup
|
||||
|
||||
@@ -39,27 +39,42 @@ Both endpoints use the same `Protocol`. EF Core migration renames `Configuration
|
||||
|
||||
The `DataConnectionActor` Reconnecting state is extended:
|
||||
|
||||
```
|
||||
Connected
|
||||
│ disconnect detected
|
||||
▼
|
||||
Push bad quality to all subscribers
|
||||
│
|
||||
▼
|
||||
Retry active endpoint (5s interval)
|
||||
│ failure
|
||||
▼
|
||||
_consecutiveFailures++
|
||||
│
|
||||
├─ < FailoverRetryCount → retry same endpoint
|
||||
│
|
||||
├─ ≥ FailoverRetryCount AND backup exists
|
||||
│ → dispose adapter, switch _activeEndpoint, reset counter
|
||||
│ → create fresh adapter with other config
|
||||
│ → attempt connect
|
||||
│
|
||||
└─ ≥ FailoverRetryCount AND no backup
|
||||
→ keep retrying indefinitely (current behavior)
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
C(["Connected"])
|
||||
BQ["Push bad quality<br/>to all subscribers"]
|
||||
RT["Retry active endpoint<br/>(5s interval)"]
|
||||
INC["_consecutiveFailures++"]
|
||||
BR{"Evaluate<br/>_consecutiveFailures"}
|
||||
SAME["Retry same endpoint"]
|
||||
FO["Failover<br/>- dispose adapter, switch _activeEndpoint, reset counter<br/>- create fresh adapter with other config<br/>- attempt connect"]
|
||||
NB["Keep retrying indefinitely<br/>(current behavior)"]
|
||||
RC(["On successful reconnect (either endpoint)<br/>1. Reset _consecutiveFailures = 0<br/>2. ReSubscribeAll() — re-create subscriptions on new adapter<br/>3. Transition to Connected<br/>4. Log failover event if endpoint changed<br/>5. Report active endpoint in health metrics"])
|
||||
|
||||
C -->|"disconnect detected"| BQ
|
||||
BQ --> RT
|
||||
RT -->|"failure"| INC
|
||||
INC --> BR
|
||||
BR -->|"< FailoverRetryCount"| SAME
|
||||
SAME -.->|"retry"| RT
|
||||
BR -->|">= FailoverRetryCount AND backup exists"| FO
|
||||
BR -->|">= FailoverRetryCount AND no backup"| NB
|
||||
NB -.->|"retry (round-robin n/a)"| RT
|
||||
FO -->|"connect succeeds"| RC
|
||||
FO -.->|"connect fails (round-robin: primary to backup to primary...)"| RT
|
||||
RC -->|"Transition to Connected"| C
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
class C,RC start
|
||||
class BQ,RT,SAME proc
|
||||
class INC,BR dec
|
||||
class FO warn
|
||||
class NB bad
|
||||
```
|
||||
|
||||
**On successful reconnect (either endpoint):**
|
||||
|
||||
@@ -32,33 +32,45 @@ We want a strongly-typed model for OPC UA endpoint configuration, a validator th
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────┐
|
||||
│ ZB.MOM.WW.ScadaBridge.Commons │
|
||||
│ Types/DataConnections/ │
|
||||
│ OpcUaEndpointConfig.cs (POCO) │
|
||||
│ OpcUaHeartbeatConfig.cs (POCO) │
|
||||
│ OpcUaSecurityMode.cs (enum) │
|
||||
│ Validators/ │
|
||||
│ OpcUaEndpointConfigValidator.cs │
|
||||
│ Serialization/ │
|
||||
│ OpcUaEndpointConfigSerializer.cs │
|
||||
└──────────────────────────────────────┘
|
||||
▲
|
||||
│ (referenced by both)
|
||||
┌───────┴────────────────────────┐
|
||||
▼ ▼
|
||||
┌──────────────────────────┐ ┌────────────────────────────┐
|
||||
│ ZB.MOM.WW.ScadaBridge.CentralUI │ │ ZB.MOM.WW.ScadaBridge.SiteRuntime │
|
||||
│ Components/Forms/ │ │ Actors/ │
|
||||
│ OpcUaEndpointEditor │ │ DeploymentManagerActor │
|
||||
│ .razor (shared) │ │ (passes raw JSON to │
|
||||
│ │ │ DataConnectionFactory)│
|
||||
│ Pages/Admin/ │ │ │
|
||||
│ DataConnectionForm │ │ DataConnections.OpcUa/ │
|
||||
│ .razor │ │ OpcUaDataConnection.cs │
|
||||
└──────────────────────────┘ │ (consumes typed model) │
|
||||
└────────────────────────────┘
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
subgraph COMMONS["ZB.MOM.WW.ScadaBridge.Commons"]
|
||||
TYPES["Types/DataConnections/<br/>OpcUaEndpointConfig.cs (POCO)<br/>OpcUaHeartbeatConfig.cs (POCO)<br/>OpcUaSecurityMode.cs (enum)"]
|
||||
VALID["Validators/<br/>OpcUaEndpointConfigValidator.cs"]
|
||||
SER["Serialization/<br/>OpcUaEndpointConfigSerializer.cs"]
|
||||
TYPES ~~~ VALID ~~~ SER
|
||||
end
|
||||
|
||||
subgraph CENTRALUI["ZB.MOM.WW.ScadaBridge.CentralUI"]
|
||||
CUIFORMS["Components/Forms/<br/>OpcUaEndpointEditor.razor (shared)"]
|
||||
CUIPAGES["Pages/Admin/<br/>DataConnectionForm.razor"]
|
||||
CUIFORMS ~~~ CUIPAGES
|
||||
end
|
||||
|
||||
subgraph SITERUNTIME["ZB.MOM.WW.ScadaBridge.SiteRuntime"]
|
||||
SRACTORS["Actors/<br/>DeploymentManagerActor<br/>(passes raw JSON to DataConnectionFactory)"]
|
||||
SRDC["DataConnections.OpcUa/<br/>OpcUaDataConnection.cs<br/>(consumes typed model)"]
|
||||
SRACTORS ~~~ SRDC
|
||||
end
|
||||
|
||||
COMMONS -->|referenced by| CENTRALUI
|
||||
COMMONS -->|referenced by| SITERUNTIME
|
||||
|
||||
NOTE["Both sides deserialize DataConnection.PrimaryConfiguration / BackupConfiguration<br/>into the same OpcUaEndpointConfig instance. The DB column type does not change."]
|
||||
CENTRALUI -.- NOTE
|
||||
SITERUNTIME -.- NOTE
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class COMMONS dec
|
||||
class TYPES,VALID,SER warn
|
||||
class CENTRALUI,CUIFORMS,CUIPAGES proc
|
||||
class SITERUNTIME,SRACTORS,SRDC start
|
||||
class NOTE muted
|
||||
```
|
||||
|
||||
Both sides deserialize from `DataConnection.PrimaryConfiguration` / `BackupConfiguration` strings into the same `OpcUaEndpointConfig` instance. The DB column type does not change.
|
||||
|
||||
@@ -17,29 +17,8 @@ A sibling `docker-env2/` directory with `deploy.sh` / `teardown.sh` / `seed-site
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
(host machine)
|
||||
|
||||
Primary stack (already existing — unchanged) Env2 stack (new)
|
||||
┌────────────────────────────────────┐ ┌──────────────────────────────┐
|
||||
│ Traefik :9000 ◄── 9001/9002 UI │ │ Traefik :9100 ◄── 9101/9102 UI│
|
||||
│ Central A/B (9011/9012 Akka) │ │ Central A/B (9111/9112 Akka) │
|
||||
│ Site-A/B/C (9021..9044) │ │ Site-X (9121/9122 Akka, │
|
||||
└─────────────┬──────────────────────┘ │ 9123/9124 gRPC) │
|
||||
│ └──────────┬───────────────────┘
|
||||
│ │
|
||||
▼ scadabridge-net (shared bridge network) ◄──────┘
|
||||
┌──────────────────────────────────────────────────────────────┐
|
||||
│ scadabridge-mssql ScadaBridgeConfig (primary DB) │
|
||||
│ ScadaBridgeMachineData (primary DB) │
|
||||
│ ScadaBridgeConfig2 (env2 DB) ← new │
|
||||
│ ScadaBridgeMachineData2(env2 DB) ← new │
|
||||
│ scadabridge-ldap (shared — same test users) │
|
||||
│ scadabridge-smtp (shared Mailpit) │
|
||||
│ scadabridge-opcua (shared) │
|
||||
│ scadabridge-restapi (shared) │
|
||||
└──────────────────────────────────────────────────────────────┘
|
||||
```
|
||||

|
||||
<!-- source: diagrams/env2-architecture-overview.drawio — edit, then re-export with export-drawio.sh -->
|
||||
|
||||
Both stacks attach to the same `scadabridge-net` Docker bridge so env2's app containers can reach the infra services by container hostname (`scadabridge-mssql`, `scadabridge-ldap`, etc.). Akka clusters are independent — each side's `SeedNodes` lists only its own central nodes, so they never gossip-merge despite sharing the network.
|
||||
|
||||
|
||||
@@ -14,18 +14,43 @@
|
||||
|
||||
## Task Dependency Graph
|
||||
|
||||
```
|
||||
T0 ─┐ ┐
|
||||
T1 ─┤ (all independent, all │
|
||||
T2 ─┤ parallelizable, all ├─► T10 (manual smoke test)
|
||||
T3 ─┤ ready from the start) │
|
||||
T4 ─┤ │
|
||||
T6 ─┤ │
|
||||
T7 ─┤ │
|
||||
T8 ─┤ │
|
||||
T9 ─┘ │
|
||||
│
|
||||
T0,T4 ──► T5 (lifecycle scripts) ─────────┘
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart LR
|
||||
GRP["all independent, all parallelizable, all ready from the start"]
|
||||
T0["T0"]
|
||||
T1["T1"]
|
||||
T2["T2"]
|
||||
T3["T3"]
|
||||
T4["T4"]
|
||||
T6["T6"]
|
||||
T7["T7"]
|
||||
T8["T8"]
|
||||
T9["T9"]
|
||||
T5["T5<br/>lifecycle scripts"]
|
||||
T10(["T10<br/>manual smoke test"])
|
||||
NOTE["T10 is the only task that requires all of T0–T9 done. Everything else runs in parallel."]
|
||||
|
||||
T0 --> T10
|
||||
T1 --> T10
|
||||
T2 --> T10
|
||||
T3 --> T10
|
||||
T6 --> T10
|
||||
T7 --> T10
|
||||
T8 --> T10
|
||||
T9 --> T10
|
||||
T0 --> T5
|
||||
T4 --> T5
|
||||
T5 --> T10
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class T0,T1,T2,T3,T4,T6,T7,T8,T9 proc
|
||||
class T5 warn
|
||||
class T10 start
|
||||
class GRP,NOTE muted
|
||||
```
|
||||
|
||||
T10 is the only task that requires all of T0–T9 done. Everything else can run in parallel.
|
||||
|
||||
@@ -18,26 +18,32 @@
|
||||
|
||||
## Section 1 — Architecture
|
||||
|
||||
```
|
||||
[Blazor Server browser]
|
||||
│ SignalR
|
||||
▼
|
||||
[CentralUI: InstanceConfigure.razor]
|
||||
│ opens
|
||||
▼
|
||||
[CentralUI: <OpcUaBrowserDialog/>]
|
||||
│ uses
|
||||
▼
|
||||
[CentralUI: IOpcUaBrowseService] ── implementation calls
|
||||
│
|
||||
▼
|
||||
[CommunicationService.SendCommandToSiteAsync<BrowseOpcUaNodeResult>(siteId, BrowseOpcUaNodeCommand)]
|
||||
│ ClusterClient Ask, ManagementEnvelope { User, Command, CorrelationId }
|
||||
▼
|
||||
[Site: CentralCommunicationActor → DataConnectionManagerActor]
|
||||
│ dispatches to IBrowsableDataConnection (RealOpcUaClient)
|
||||
▼
|
||||
[OPC UA server] ◄── OPC Foundation .NET SDK Browse service
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
N1["Blazor Server browser"]
|
||||
N2["CentralUI: InstanceConfigure.razor"]
|
||||
N3["CentralUI: OpcUaBrowserDialog component"]
|
||||
N4["CentralUI: IOpcUaBrowseService"]
|
||||
N5["CommunicationService.SendCommandToSiteAsync of BrowseOpcUaNodeResult (siteId, BrowseOpcUaNodeCommand)"]
|
||||
N6["Site: CentralCommunicationActor → DataConnectionManagerActor"]
|
||||
N7["OPC UA server"]
|
||||
|
||||
N1 -->|SignalR| N2
|
||||
N2 -->|opens| N3
|
||||
N3 -->|uses| N4
|
||||
N4 -->|implementation calls| N5
|
||||
N5 -->|"ClusterClient Ask<br/>ManagementEnvelope { User, Command, CorrelationId }"| N6
|
||||
N6 -->|"dispatches to IBrowsableDataConnection (RealOpcUaClient)<br/>OPC Foundation .NET SDK Browse service"| N7
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
class N1,N2,N3,N4 proc
|
||||
class N5 alt
|
||||
class N6 start
|
||||
class N7 warn
|
||||
```
|
||||
|
||||
Three slices, top-to-bottom:
|
||||
@@ -164,24 +170,47 @@ Returning failure inside `BrowseOpcUaNodeResult` (rather than exceptions across
|
||||
|
||||
**Wire flow.**
|
||||
|
||||
```
|
||||
CentralUI.OpcUaBrowseService.BrowseChildrenAsync(siteId, connId, parent)
|
||||
→ CommunicationService.SendCommandToSiteAsync<BrowseOpcUaNodeResult>(
|
||||
siteId,
|
||||
new BrowseOpcUaNodeCommand(connId, parent))
|
||||
→ ManagementEnvelope { User, Command, CorrelationId } over ClusterClient
|
||||
→ Site: CentralCommunicationActor unwraps envelope
|
||||
→ Site: DataConnectionManagerActor receives BrowseOpcUaNodeCommand
|
||||
- Look up IDataConnection by Id
|
||||
- if not found → ConnectionNotFound
|
||||
- if !(conn is IBrowsableDataConnection) → NotBrowsable
|
||||
- else await conn.BrowseChildrenAsync(ParentNodeId, ct)
|
||||
- Catch ConnectionNotConnectedException → ConnectionNotConnected
|
||||
- Catch OperationCanceledException → Timeout
|
||||
- Catch ServiceResultException → ServerError + verbatim msg
|
||||
- Else success: BrowseOpcUaNodeResult(children, truncated, null)
|
||||
→ Reply travels back via CentralCommunicationActor → CommunicationService
|
||||
→ returned to CentralUI page
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
S1["CentralUI.OpcUaBrowseService.BrowseChildrenAsync(siteId, connId, parent)"]
|
||||
S2["CommunicationService.SendCommandToSiteAsync of BrowseOpcUaNodeResult (siteId, new BrowseOpcUaNodeCommand(connId, parent))"]
|
||||
S3["ManagementEnvelope { User, Command, CorrelationId }<br/>over ClusterClient"]
|
||||
S4["Site: CentralCommunicationActor unwraps envelope"]
|
||||
S5["Site: DataConnectionManagerActor receives BrowseOpcUaNodeCommand<br/>(DCL coordinator actor — owns the per-connection IDataConnection instances)"]
|
||||
|
||||
S1 --> S2 --> S3 --> S4 --> S5
|
||||
|
||||
subgraph HANDLER["Handler logic"]
|
||||
direction TB
|
||||
HL["Look up IDataConnection by Id"]
|
||||
HNF["if not found → ConnectionNotFound"]
|
||||
HNB["if not (conn is IBrowsableDataConnection) → NotBrowsable"]
|
||||
HAW["else await conn.BrowseChildrenAsync(ParentNodeId, ct)"]
|
||||
HNC["Catch ConnectionNotConnectedException → ConnectionNotConnected"]
|
||||
HCN["Catch OperationCanceledException → Timeout"]
|
||||
HSVC["Catch ServiceResultException → ServerError + verbatim msg"]
|
||||
HSUC["Else success: BrowseOpcUaNodeResult(children, truncated, null)"]
|
||||
HL --- HNF --- HNB --- HAW --- HNC --- HCN --- HSVC --- HSUC
|
||||
end
|
||||
|
||||
S5 -->|processes| HANDLER
|
||||
|
||||
R1["Reply travels back via<br/>CentralCommunicationActor → CommunicationService"]
|
||||
R2["returned to CentralUI page"]
|
||||
HANDLER -->|result / failure| R1
|
||||
R1 --> R2
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
class S1,R1,R2 proc
|
||||
class S2,S3 alt
|
||||
class S4,S5,HSUC start
|
||||
class HANDLER,HL,HAW dec
|
||||
class HNF,HNB,HNC,HCN,HSVC bad
|
||||
```
|
||||
|
||||
Handler lives in the **DCL coordinator actor** (the same actor that owns the per-connection `IDataConnection` instances) — keeps lifecycle and browse co-located so we don't race against reconnect.
|
||||
|
||||
@@ -16,22 +16,86 @@
|
||||
|
||||
## Task dependency overview
|
||||
|
||||
```
|
||||
T1 ─┬─ T2 ─┬─ T17 (computed AlarmActor enrich)
|
||||
│ ├─ T18 (proto) ── T19 (grpc mapping) ── T23 (DebugView)
|
||||
T3 ─┼─ T10 (DCL actor)
|
||||
├─ T11 (OPC UA adapter)
|
||||
└─ T12 (MxGateway adapter)
|
||||
T4 ─┬─ T5 ── T6 ── T21 (mgmt handlers)
|
||||
├─ T7 (migration)
|
||||
├─ T8 ── T9 (validation)
|
||||
└─ T20 ─┬─ T21 ── T26 (seed)
|
||||
├─ T22 (CLI)
|
||||
├─ T24 (template UI)
|
||||
└─ T25 (instance UI)
|
||||
T13, T14 ──┐
|
||||
T1,T2,T3,T4(Resolved),T13,T14 ── T15 (NativeAlarmActor) ── T16 (InstanceActor wiring)
|
||||
(everything) ── T27 (docs) , T28 (integration/manual verify)
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart LR
|
||||
T1["T1"]
|
||||
T3["T3"]
|
||||
T2["T2"]
|
||||
T10["T10<br/>DCL actor"]
|
||||
T11["T11<br/>OPC UA adapter"]
|
||||
T12["T12<br/>MxGateway adapter"]
|
||||
T17["T17<br/>computed AlarmActor enrich"]
|
||||
T18["T18<br/>proto"]
|
||||
T19["T19<br/>grpc mapping"]
|
||||
T23["T23<br/>DebugView"]
|
||||
|
||||
T4["T4"]
|
||||
T5["T5"]
|
||||
T6["T6"]
|
||||
T7["T7<br/>migration"]
|
||||
T8["T8"]
|
||||
T9["T9<br/>validation"]
|
||||
T20["T20"]
|
||||
T21["T21<br/>mgmt handlers"]
|
||||
T26["T26<br/>seed"]
|
||||
T22["T22<br/>CLI"]
|
||||
T24["T24<br/>template UI"]
|
||||
T25["T25<br/>instance UI"]
|
||||
|
||||
T13["T13"]
|
||||
T14["T14"]
|
||||
T15["T15<br/>NativeAlarmActor"]
|
||||
T16["T16<br/>InstanceActor wiring"]
|
||||
|
||||
T15IN["inputs to T15:<br/>T1, T2, T3, T4 (Resolved), T13, T14"]
|
||||
T27["T27<br/>docs"]
|
||||
T28["T28<br/>integration / manual verify"]
|
||||
EVT["(everything) emits to T27 and T28"]
|
||||
|
||||
T1 --> T2
|
||||
T1 --> T10
|
||||
T1 --> T11
|
||||
T1 --> T12
|
||||
T3 --> T2
|
||||
T3 --> T10
|
||||
T3 --> T11
|
||||
T3 --> T12
|
||||
|
||||
T2 --> T17
|
||||
T2 --> T18
|
||||
T18 --> T19
|
||||
T19 --> T23
|
||||
|
||||
T4 --> T5
|
||||
T4 --> T7
|
||||
T4 --> T8
|
||||
T4 --> T20
|
||||
T5 --> T6
|
||||
T6 --> T21
|
||||
T8 --> T9
|
||||
T20 --> T21
|
||||
T20 --> T22
|
||||
T20 --> T24
|
||||
T20 --> T25
|
||||
T21 --> T26
|
||||
|
||||
T13 --> T15
|
||||
T14 --> T15
|
||||
T15 --> T16
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class T1,T2,T3,T10,T11,T12 proc
|
||||
class T17,T18,T19,T23 alt
|
||||
class T4,T5,T6,T7,T8,T9,T20,T21,T22,T24,T25,T26 start
|
||||
class T13,T14 dec
|
||||
class T15,T16 warn
|
||||
class T27,T28,T15IN,EVT muted
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
{"id": 2, "subject": "Task 2: Run scrub + completeness gate", "status": "completed", "blockedBy": [1]},
|
||||
{"id": 3, "subject": "Task 3: Build, run unit tests, fix stragglers", "status": "completed", "blockedBy": [2]},
|
||||
{"id": 4, "subject": "Task 4: Commit the scrub", "status": "completed", "blockedBy": [3]},
|
||||
{"id": 5, "subject": "Task 5: Update local Git remote after Gitea web-UI rename", "status": "pending", "blockedBy": [4]},
|
||||
{"id": 6, "subject": "Task 6: Runtime cutover redeploy (conditional)", "status": "pending", "blockedBy": [4]}
|
||||
{"id": 5, "subject": "Task 5: Update local Git remote after Gitea web-UI rename", "status": "completed", "blockedBy": [4]},
|
||||
{"id": 6, "subject": "Task 6: Runtime cutover redeploy (conditional)", "status": "completed", "blockedBy": [4]}
|
||||
],
|
||||
"lastUpdated": "2026-06-01T01:59:34Z"
|
||||
"lastUpdated": "2026-06-01T02:05:03Z"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
<mxfile host="app.diagrams.net">
|
||||
<diagram id="env2arch" name="Env2 Architecture">
|
||||
<mxGraphModel dx="1400" dy="900" grid="1" gridSize="10" guides="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1100" pageHeight="900" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
|
||||
<!-- Host machine container -->
|
||||
<mxCell id="host" value="host machine" style="rounded=0;whiteSpace=wrap;html=1;fillColor=none;strokeColor=#666666;verticalAlign=top;fontStyle=2;fontColor=#666666;dashed=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="40" y="40" width="1020" height="800" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- Primary stack -->
|
||||
<mxCell id="primary" value="Primary stack (already existing — unchanged)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;verticalAlign=top;fontStyle=1;align=center;spacingTop=6;" vertex="1" parent="1">
|
||||
<mxGeometry x="80" y="100" width="420" height="220" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="p-traefik" value="Traefik :9000 ◄── 9001/9002 UI" style="whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#6c8ebf;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="100" y="160" width="380" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="p-central" value="Central A/B (9011/9012 Akka)" style="whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#6c8ebf;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="100" y="210" width="380" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="p-site" value="Site-A/B/C (9021..9044)" style="whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#6c8ebf;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="100" y="260" width="380" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- Env2 stack -->
|
||||
<mxCell id="env2" value="Env2 stack (new)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;verticalAlign=top;fontStyle=1;align=center;spacingTop=6;" vertex="1" parent="1">
|
||||
<mxGeometry x="600" y="100" width="420" height="220" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e-traefik" value="Traefik :9100 ◄── 9101/9102 UI" style="whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#82b366;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="620" y="160" width="380" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e-central" value="Central A/B (9111/9112 Akka)" style="whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#82b366;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="620" y="210" width="380" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="e-site" value="Site-X (9121/9122 Akka, 9123/9124 gRPC)" style="whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#82b366;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="620" y="260" width="380" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- Shared network bar -->
|
||||
<mxCell id="net" value="scadabridge-net (shared bridge network)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontStyle=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="80" y="400" width="940" height="50" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- Infra container -->
|
||||
<mxCell id="infra" value="" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;strokeColor=#666666;verticalAlign=top;" vertex="1" parent="1">
|
||||
<mxGeometry x="120" y="500" width="860" height="300" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- MSSQL block -->
|
||||
<mxCell id="mssql" value="scadabridge-mssql" style="shape=cylinder3;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;verticalAlign=top;fontStyle=1;spacingTop=4;" vertex="1" parent="1">
|
||||
<mxGeometry x="150" y="530" width="160" height="240" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="db1" value="ScadaBridgeConfig (primary DB)" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="340" y="540" width="300" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="db2" value="ScadaBridgeMachineData (primary DB)" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="340" y="590" width="300" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="db3" value="ScadaBridgeConfig2 (env2 DB) ← new" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="340" y="640" width="300" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="db4" value="ScadaBridgeMachineData2 (env2 DB) ← new" style="whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="340" y="690" width="300" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- Shared commodity infra services -->
|
||||
<mxCell id="ldap" value="scadabridge-ldap (shared — same test users)" style="whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="680" y="530" width="280" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="smtp" value="scadabridge-smtp (shared Mailpit)" style="whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="680" y="580" width="280" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="opcua" value="scadabridge-opcua (shared)" style="whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="680" y="630" width="280" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="restapi" value="scadabridge-restapi (shared)" style="whiteSpace=wrap;html=1;fillColor=#ffe6cc;strokeColor=#d79b00;align=left;spacingLeft=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="680" y="680" width="280" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
|
||||
<!-- Edges: primary -> net, env2 -> net -->
|
||||
<mxCell id="ep" style="edgeStyle=orthogonalEdgeStyle;rounded=0;html=1;endArrow=block;strokeColor=#6c8ebf;" edge="1" parent="1" source="primary" target="net">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ee" style="edgeStyle=orthogonalEdgeStyle;rounded=0;html=1;endArrow=block;strokeColor=#82b366;" edge="1" parent="1" source="env2" target="net">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<!-- net -> infra -->
|
||||
<mxCell id="eni" style="edgeStyle=orthogonalEdgeStyle;rounded=0;html=1;endArrow=block;strokeColor=#d6b656;" edge="1" parent="1" source="net" target="infra">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 317 KiB |
@@ -547,26 +547,26 @@ This section governs how implementation plans are executed. The goal is autonomo
|
||||
|
||||
For each work package, follow this sequence:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ 1. READ the WP description and acceptance criteria │
|
||||
│ 2. READ all traced requirements (HLR bullets, KDD, │
|
||||
│ CD constraints) to understand intent │
|
||||
│ 3. IMPLEMENT the WP │
|
||||
│ - Write code │
|
||||
│ - Write unit tests for acceptance criteria │
|
||||
│ - Write negative tests for prohibition criteria │
|
||||
│ 4. VERIFY acceptance criteria │
|
||||
│ - Run tests: all must pass │
|
||||
│ - Walk each acceptance criterion line by line │
|
||||
│ - If a criterion cannot be verified yet (depends │
|
||||
│ on a later WP), note it as "deferred to WP-N" │
|
||||
│ 5. UPDATE the phase execution checklist │
|
||||
│ - Mark WP as complete with date │
|
||||
│ - Note any deferred criteria │
|
||||
│ - Note any questions logged │
|
||||
│ 6. COMMIT with message: "Phase N WP-M: <summary>" │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
S1["1. READ the WP description and acceptance criteria"]
|
||||
S2["2. READ all traced requirements (HLR bullets, KDD, CD constraints) to understand intent"]
|
||||
S3["3. IMPLEMENT the WP<br/>• Write code<br/>• Write unit tests for acceptance criteria<br/>• Write negative tests for prohibition criteria"]
|
||||
S4["4. VERIFY acceptance criteria<br/>• Run tests: all must pass<br/>• Walk each acceptance criterion line by line<br/>• If a criterion cannot be verified yet (depends on a later WP), note it as deferred to WP-N"]
|
||||
S5["5. UPDATE the phase execution checklist<br/>• Mark WP as complete with date<br/>• Note any deferred criteria<br/>• Note any questions logged"]
|
||||
S6["6. COMMIT with message:<br/>Phase N WP-M: summary"]
|
||||
|
||||
S1 --> S2 --> S3 --> S4 --> S5 --> S6
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
class S1,S2 proc
|
||||
class S3 start
|
||||
class S4,S5 dec
|
||||
class S6 warn
|
||||
```
|
||||
|
||||
### Mid-Phase Compliance Check
|
||||
|
||||
+73
-56
@@ -23,28 +23,38 @@ gRPC server-streaming is an established pattern for real-time tag value updates;
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Central Cluster Site Cluster
|
||||
───────────── ────────────
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
subgraph CENTRAL["Central Cluster"]
|
||||
BT["DebugStreamBridgeActor"]
|
||||
GC["SiteStreamGrpcClient<br/>(per-site, on central)"]
|
||||
BB["DebugStreamBridgeActor"]
|
||||
SR(["SignalR Hub / Blazor UI"])
|
||||
end
|
||||
|
||||
DebugStreamBridgeActor InstanceActor
|
||||
│ │
|
||||
│── SubscribeDebugView ──► │ (ClusterClient: command/control)
|
||||
│◄── DebugViewSnapshot ── │
|
||||
│ │
|
||||
│ │ publishes AttributeValueChanged
|
||||
│ │ publishes AlarmStateChanged
|
||||
│ ▼
|
||||
SiteStreamGrpcClient ◄──── gRPC stream ───── SiteStreamGrpcServer
|
||||
(per-site, on central) (HTTP/2) (Kestrel, on site)
|
||||
│ │
|
||||
│ reads from gRPC stream │ receives from SiteStreamManager
|
||||
│ routes by correlationId │ filters by instance name
|
||||
▼ │
|
||||
DebugStreamBridgeActor │
|
||||
│ │
|
||||
▼ │
|
||||
SignalR Hub / Blazor UI │
|
||||
subgraph SITE["Site Cluster"]
|
||||
IN["InstanceActor"]
|
||||
PB{"publishes<br/>AttributeValueChanged<br/>AlarmStateChanged"}
|
||||
GS["SiteStreamGrpcServer<br/>(Kestrel, on site)"]
|
||||
end
|
||||
|
||||
BT -.->|"SubscribeDebugView"| IN
|
||||
IN -.->|"DebugViewSnapshot"| BT
|
||||
IN --> PB
|
||||
PB --> GS
|
||||
GS -->|"gRPC stream (HTTP/2)"| GC
|
||||
GC --> BB
|
||||
BB --> SR
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
class BT,GC,BB proc
|
||||
class SR start
|
||||
class IN,GS warn
|
||||
class PB dec
|
||||
```
|
||||
|
||||
**Key separation**: ClusterClient handles subscribe/unsubscribe/snapshot (request-response). gRPC handles the ongoing value stream (server-streaming).
|
||||
@@ -271,16 +281,22 @@ public override async Task SubscribeInstance(
|
||||
|
||||
`IServerStreamWriter<T>` is **not thread-safe**. Multiple Akka actors may publish events concurrently. The `Channel<SiteStreamEvent>` bridges these worlds:
|
||||
|
||||
```
|
||||
Akka Actor Thread(s) gRPC Response Stream
|
||||
│ ▲
|
||||
│ channel.Writer.TryWrite(evt) │ await responseStream.WriteAsync(evt)
|
||||
▼ │
|
||||
┌─────────────────────────────────────────┐
|
||||
│ Channel<SiteStreamEvent> │
|
||||
│ BoundedChannelOptions(1000) │
|
||||
│ FullMode = DropOldest │
|
||||
└─────────────────────────────────────────┘
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
AKKA["Akka Actor Thread(s)"]
|
||||
CH(["Channel<SiteStreamEvent><br/><br/>BoundedChannelOptions(1000)<br/>FullMode = DropOldest"])
|
||||
GRPC["gRPC Response Stream"]
|
||||
|
||||
AKKA -->|"channel.Writer.TryWrite(evt)"| CH
|
||||
CH -->|"await responseStream.WriteAsync(evt)"| GRPC
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
class AKKA warn
|
||||
class CH start
|
||||
class GRPC proc
|
||||
```
|
||||
|
||||
- **Bounded capacity** (1000): prevents unbounded memory growth if the gRPC client is slow
|
||||
@@ -431,31 +447,32 @@ private void HandleGrpcStreamError(Exception ex)
|
||||
|
||||
### Reconnection State Machine (DebugStreamBridgeActor)
|
||||
|
||||
```
|
||||
┌──────────────────┐
|
||||
│ Streaming │ ◄── Normal state: gRPC stream active
|
||||
└────────┬─────────┘
|
||||
│ gRPC stream error / keepalive timeout
|
||||
▼
|
||||
┌──────────────────┐
|
||||
┌──► │ Reconnecting │ ── try other node endpoint
|
||||
│ └────────┬─────────┘
|
||||
│ │
|
||||
│ ┌────────┴─────────┐
|
||||
│ │ │
|
||||
│ success failure (retry < max)
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ Streaming schedule retry (5s backoff)
|
||||
│ │
|
||||
└───────────────────────┘
|
||||
│
|
||||
failure (retry >= max)
|
||||
│
|
||||
▼
|
||||
┌──────────────────┐
|
||||
│ Terminated │ ── notify consumer, stop actor
|
||||
└──────────────────┘
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
S(["Streaming<br/><i>Normal state: gRPC stream active</i>"])
|
||||
R(["Reconnecting<br/><i>try other node endpoint</i>"])
|
||||
D{"reconnect result?"}
|
||||
RT["schedule retry<br/>(5s backoff)"]
|
||||
T(["Terminated<br/><i>notify consumer, stop actor</i>"])
|
||||
|
||||
S -->|"gRPC stream error / keepalive timeout"| R
|
||||
R --> D
|
||||
D -->|"success"| S
|
||||
D -->|"failure (retry < max)"| RT
|
||||
RT --> R
|
||||
D -->|"failure (retry >= max)"| T
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
class S start
|
||||
class R dec
|
||||
class D proc
|
||||
class RT warn
|
||||
class T bad
|
||||
```
|
||||
|
||||
### Summary
|
||||
|
||||
@@ -167,19 +167,54 @@ Keepalive settings are configurable via `CommunicationOptions`:
|
||||
|
||||
## Topology
|
||||
|
||||
```
|
||||
Central Cluster
|
||||
├── ClusterClient → Site A Cluster (SiteCommunicationActor via Receptionist) [command/control]
|
||||
├── ClusterClient → Site B Cluster (SiteCommunicationActor via Receptionist) [command/control]
|
||||
└── ClusterClient → Site N Cluster (SiteCommunicationActor via Receptionist) [command/control]
|
||||
│
|
||||
├── SiteStreamGrpcClient ◄── gRPC stream ── Site A (SiteStreamGrpcServer) [real-time data]
|
||||
├── SiteStreamGrpcClient ◄── gRPC stream ── Site B (SiteStreamGrpcServer) [real-time data]
|
||||
└── SiteStreamGrpcClient ◄── gRPC stream ── Site N (SiteStreamGrpcServer) [real-time data]
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart LR
|
||||
subgraph Central["Central Cluster"]
|
||||
CCA["ClusterClient<br/>(command/control)"]
|
||||
CCB["ClusterClient<br/>(command/control)"]
|
||||
CCN["ClusterClient<br/>(command/control)"]
|
||||
GRPCC["SiteStreamGrpcClient<br/>(real-time data)"]
|
||||
end
|
||||
|
||||
Site Clusters
|
||||
└── ClusterClient → Central Cluster (CentralCommunicationActor via Receptionist) [command/control]
|
||||
└── SiteStreamGrpcServer (Kestrel HTTP/2, port 8083) → serves gRPC streams [real-time data]
|
||||
subgraph SiteA["Site A Cluster"]
|
||||
SACOMM["SiteCommunicationActor<br/>(via Receptionist)"]
|
||||
SAGRPC["SiteStreamGrpcServer<br/>(Kestrel HTTP/2, port 8083)"]
|
||||
SACC["ClusterClient to Central<br/>(CentralCommunicationActor)"]
|
||||
end
|
||||
|
||||
subgraph SiteB["Site B Cluster"]
|
||||
SBCOMM["SiteCommunicationActor<br/>(via Receptionist)"]
|
||||
SBGRPC["SiteStreamGrpcServer"]
|
||||
end
|
||||
|
||||
subgraph SiteN["Site N Cluster"]
|
||||
SNCOMM["SiteCommunicationActor<br/>(via Receptionist)"]
|
||||
SNGRPC["SiteStreamGrpcServer"]
|
||||
end
|
||||
|
||||
CCA -->|command/control| SACOMM
|
||||
CCB -->|command/control| SBCOMM
|
||||
CCN -->|command/control| SNCOMM
|
||||
|
||||
SAGRPC -->|"gRPC stream (real-time data)"| GRPCC
|
||||
SBGRPC -->|gRPC stream| GRPCC
|
||||
SNGRPC -->|gRPC stream| GRPCC
|
||||
|
||||
SACC -.->|command/control| Central
|
||||
|
||||
NOTE["Sites do NOT communicate with each other.<br/>All inter-cluster communication flows through Central."]
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class CCA,CCB,CCN,SACOMM,SACC,SBCOMM,SNCOMM dec
|
||||
class GRPCC,SAGRPC,SBGRPC,SNGRPC start
|
||||
class NOTE muted
|
||||
class Central proc
|
||||
class SiteA,SiteB,SiteN alt
|
||||
```
|
||||
|
||||
- Sites do **not** communicate with each other.
|
||||
|
||||
@@ -143,14 +143,32 @@ EF Core's DbContext naturally provides unit-of-work semantics:
|
||||
|
||||
### Example Transactional Flow
|
||||
|
||||
```
|
||||
Template Engine: Create Template
|
||||
│
|
||||
├── repository.AddTemplate(template) // template is a Commons POCO
|
||||
├── repository.AddAttributes(attributes) // attributes are Commons POCOs
|
||||
├── repository.AddAlarms(alarms) // alarms are Commons POCOs
|
||||
├── repository.AddScripts(scripts) // scripts are Commons POCOs
|
||||
└── repository.SaveChangesAsync() // single transaction commits all
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
start(["Template Engine: Create Template"])
|
||||
add1["repository.AddTemplate(template)<br/>// template is a Commons POCO"]
|
||||
add2["repository.AddAttributes(attributes)<br/>// attributes are Commons POCOs"]
|
||||
add3["repository.AddAlarms(alarms)<br/>// alarms are Commons POCOs"]
|
||||
add4["repository.AddScripts(scripts)<br/>// scripts are Commons POCOs"]
|
||||
save["repository.SaveChangesAsync()<br/>// single transaction commits all"]
|
||||
db[("Configuration DB<br/>(MS SQL)")]
|
||||
|
||||
start --> add1
|
||||
add1 --> add2
|
||||
add2 --> add3
|
||||
add3 --> add4
|
||||
add4 --> save
|
||||
save -. "single transaction" .-> db
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class start start
|
||||
class add1,add2,add3,add4 proc
|
||||
class save dec
|
||||
class db muted
|
||||
```
|
||||
|
||||
---
|
||||
@@ -184,13 +202,30 @@ Audit entries are written **synchronously** within the same database transaction
|
||||
|
||||
### Integration Example
|
||||
|
||||
```
|
||||
Template Engine: Update Template
|
||||
│
|
||||
├── repository.UpdateTemplate(template)
|
||||
├── auditService.LogAsync(user, "Update", "Template", template.Id,
|
||||
│ template.Name, template)
|
||||
└── repository.SaveChangesAsync() ← both the change and audit entry commit together
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
start(["Template Engine: Update Template"])
|
||||
upd["repository.UpdateTemplate(template)"]
|
||||
audit["auditService.LogAsync(user, "Update", "Template",<br/>template.Id, template.Name, template)"]
|
||||
save["repository.SaveChangesAsync()"]
|
||||
note["both the change and audit entry<br/>commit together"]
|
||||
|
||||
start --> upd
|
||||
upd --> audit
|
||||
audit --> save
|
||||
save -.- note
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
class start start
|
||||
class upd proc
|
||||
class audit alt
|
||||
class save dec
|
||||
class note warn
|
||||
```
|
||||
|
||||
### Audit Entry Schema
|
||||
|
||||
@@ -80,11 +80,38 @@ Data connections support an optional backup endpoint for automatic failover when
|
||||
|
||||
**Failover state machine:**
|
||||
|
||||
```
|
||||
Connected → disconnect → push bad quality → retry active endpoint (5s)
|
||||
→ N failures (≥ FailoverRetryCount) → switch to other endpoint
|
||||
→ dispose adapter, create fresh adapter with other config
|
||||
→ reconnect → ReSubscribeAll → Connected
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
connected(["Connected"])
|
||||
pushbad["push bad quality"]
|
||||
retry["retry active endpoint<br/>(5s)"]
|
||||
decide{"N failures<br/>(≥ FailoverRetryCount)?"}
|
||||
switch["switch to other endpoint"]
|
||||
dispose["dispose adapter,<br/>create fresh adapter<br/>with other config"]
|
||||
reconnect["reconnect"]
|
||||
resub["ReSubscribeAll"]
|
||||
|
||||
connected -->|disconnect| pushbad
|
||||
pushbad --> retry
|
||||
retry --> decide
|
||||
decide -->|"no (retry again)"| retry
|
||||
decide -->|yes| switch
|
||||
switch --> dispose
|
||||
dispose --> reconnect
|
||||
reconnect --> resub
|
||||
resub -->|back to Connected| connected
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
class connected start
|
||||
class pushbad bad
|
||||
class retry,reconnect,resub proc
|
||||
class decide dec
|
||||
class switch,dispose warn
|
||||
```
|
||||
|
||||
- **Round-robin**: primary → backup → primary → backup. No preferred endpoint after first failover — the connection stays on whichever endpoint is working.
|
||||
|
||||
@@ -22,23 +22,46 @@ Central cluster only. The site-side deployment responsibilities (receiving confi
|
||||
|
||||
## Deployment Flow
|
||||
|
||||
```
|
||||
Engineer (UI) → Deployment Manager (Central)
|
||||
│
|
||||
├── 1. Request validated + flattened config from Template Engine
|
||||
│ (validation includes flattening, script compilation,
|
||||
│ trigger references, connection binding completeness)
|
||||
├── 2. If validation fails → return errors to UI, stop
|
||||
├── 3. Send config to site via Communication Layer
|
||||
│ │
|
||||
│ ▼
|
||||
│ Site Runtime (Deployment Manager Singleton)
|
||||
│ ├── 4. Store new flattened config locally (SQLite)
|
||||
│ ├── 5. Compile scripts at site
|
||||
│ ├── 6. Create/update Instance Actor (with child Script + Alarm Actors)
|
||||
│ └── 7. Report success/failure back to central
|
||||
│
|
||||
└── 8. Update deployment status in config DB
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
engineer(["Engineer (UI)"])
|
||||
|
||||
subgraph DMC["Deployment Manager (Central)"]
|
||||
step1["1. Request validated and flattened config from Template Engine<br/>(validation: flattening, script compilation, trigger references,<br/>connection binding completeness)"]
|
||||
step2{"2. Validation fails?"}
|
||||
step2fail(["Return errors to UI, stop"])
|
||||
step3["3. Send config to site via Communication Layer"]
|
||||
step8[("8. Update deployment status in config DB")]
|
||||
end
|
||||
|
||||
subgraph SR["Site Runtime (Deployment Manager Singleton)"]
|
||||
step4[("4. Store new flattened config locally (SQLite)")]
|
||||
step5["5. Compile scripts at site"]
|
||||
step6["6. Create/update Instance Actor<br/>(with child Script + Alarm Actors)"]
|
||||
step7["7. Report success/failure back to central"]
|
||||
end
|
||||
|
||||
engineer --> step1
|
||||
step1 --> step2
|
||||
step2 -->|yes| step2fail
|
||||
step2 -->|no| step3
|
||||
step3 -->|config| step4
|
||||
step4 --> step5
|
||||
step5 --> step6
|
||||
step6 --> step7
|
||||
step7 -. "report success/failure" .-> step8
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
class engineer start
|
||||
class step1,step5,step6,step7 dec
|
||||
class step2,step2fail bad
|
||||
class step3 dec
|
||||
class step8 proc
|
||||
class step4 start
|
||||
```
|
||||
|
||||
## Deployment Identity & Idempotency
|
||||
|
||||
@@ -123,19 +123,40 @@ API method scripts are compiled at central startup — all method definitions ar
|
||||
|
||||
## Request Flow
|
||||
|
||||
```
|
||||
External System
|
||||
│
|
||||
▼
|
||||
Inbound API (Central)
|
||||
├── 1. Extract API key from request
|
||||
├── 2. Validate key exists and is enabled
|
||||
├── 3. Resolve method by name
|
||||
├── 4. Check API key is in method's approved list
|
||||
├── 5. Validate and deserialize parameters
|
||||
├── 6. Execute implementation script (subject to method timeout)
|
||||
├── 7. Serialize return value
|
||||
└── 8. Return response
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
ext(["External System"])
|
||||
api["Inbound API (Central)"]
|
||||
s1["1. Extract API key from request"]
|
||||
s2["2. Validate key exists and is enabled"]
|
||||
s3["3. Resolve method by name"]
|
||||
s4["4. Check API key is in method's approved list"]
|
||||
s5["5. Validate and deserialize parameters"]
|
||||
s6["6. Execute implementation script<br/>(subject to method timeout)"]
|
||||
s7["7. Serialize return value"]
|
||||
s8["8. Return response"]
|
||||
|
||||
ext --> api
|
||||
api --> s1
|
||||
s1 --> s2
|
||||
s2 --> s3
|
||||
s3 --> s4
|
||||
s4 --> s5
|
||||
s5 --> s6
|
||||
s6 --> s7
|
||||
s7 --> s8
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
class ext start
|
||||
class api proc
|
||||
class s1,s2,s3,s4,s5,s7 dec
|
||||
class s6 alt
|
||||
class s8 warn
|
||||
```
|
||||
|
||||
## Implementation Script Capabilities
|
||||
|
||||
@@ -24,23 +24,39 @@ SMTP and HTTP delivery is blocking I/O. Delivery work runs on a **dedicated bloc
|
||||
|
||||
## End-to-End Flow
|
||||
|
||||
```
|
||||
Site script: Notify.To("list").Send(subject, body)
|
||||
│ generate NotificationId (GUID) locally; return it to the script immediately
|
||||
▼
|
||||
Site Store-and-Forward Engine (notification category, target = central)
|
||||
│ durably forwards to central via Central–Site Communication (ClusterClient);
|
||||
│ buffers/retries if central is unreachable
|
||||
▼
|
||||
Central ingest: insert-if-not-exists on NotificationId → Notifications table (Pending)
|
||||
│ ack the site → site S&F clears the message
|
||||
▼
|
||||
Central Notification Outbox actor (singleton, active central node)
|
||||
│ polls due rows; resolves the list; delivers via the matching adapter
|
||||
├── success → Delivered
|
||||
├── transient failure → Retrying (schedule NextAttemptAt)
|
||||
└── permanent failure
|
||||
/ retries exhausted → Parked
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
SCRIPT(["Site script: Notify.To('list').Send(subject, body)<br/>generate NotificationId (GUID) locally;<br/>return it to the script immediately"])
|
||||
SNF["Site Store-and-Forward Engine<br/>(notification category, target = central)<br/>durably forwards to central via Central-Site Communication<br/>(ClusterClient); buffers/retries if central is unreachable"]
|
||||
INGEST[("Central ingest: insert-if-not-exists on NotificationId<br/>to Notifications table (Pending)<br/>ack the site, site S and F clears the message")]
|
||||
OUTBOX["Central Notification Outbox actor<br/>(singleton, active central node)<br/>polls due rows; resolves the list;<br/>delivers via the matching adapter"]
|
||||
D1{Delivery outcome}
|
||||
DELIVERED(["Delivered"])
|
||||
RETRYING["Retrying<br/>(schedule NextAttemptAt)"]
|
||||
PARKED(["Parked"])
|
||||
|
||||
SCRIPT --> SNF
|
||||
SNF --> INGEST
|
||||
INGEST --> OUTBOX
|
||||
OUTBOX --> D1
|
||||
D1 -->|success| DELIVERED
|
||||
D1 -->|transient failure| RETRYING
|
||||
D1 -->|"permanent failure /<br/>retries exhausted"| PARKED
|
||||
RETRYING -.->|retry due| OUTBOX
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
class SCRIPT,DELIVERED start
|
||||
class SNF warn
|
||||
class INGEST proc
|
||||
class OUTBOX alt
|
||||
class D1,RETRYING dec
|
||||
class PARKED bad
|
||||
```
|
||||
|
||||
The site forwards only `(listName, subject, body)` plus provenance — recipient resolution happens at central, at delivery time. This keeps notification-list definitions in one place and removes the deploy-to-sites artifact entirely.
|
||||
|
||||
@@ -27,20 +27,56 @@ Site clusters only.
|
||||
|
||||
## Actor Hierarchy
|
||||
|
||||
```
|
||||
Deployment Manager Singleton (Cluster Singleton)
|
||||
├── Instance Actor ("MachineA-001")
|
||||
│ ├── Script Actor ("MonitorSpeed") — coordinator
|
||||
│ │ └── Script Execution Actor — short-lived, per invocation
|
||||
│ ├── Script Actor ("CalculateOEE") — coordinator
|
||||
│ │ └── Script Execution Actor — short-lived, per invocation
|
||||
│ ├── Alarm Actor ("OverTemp") — coordinator (computed)
|
||||
│ │ └── Alarm Execution Actor — short-lived, per on-trigger invocation
|
||||
│ ├── Alarm Actor ("LowPressure") — coordinator (computed)
|
||||
│ └── Native Alarm Actor ("OpcUaServer1") — read-only mirror, peer to Alarm Actor
|
||||
├── Instance Actor ("MachineA-002")
|
||||
│ └── ...
|
||||
└── ...
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
DMS["Deployment Manager Singleton<br/>(Cluster Singleton)"]
|
||||
IA1["Instance Actor<br/>('MachineA-001')"]
|
||||
IA2["Instance Actor<br/>('MachineA-002')"]
|
||||
IAMORE["… more Instance Actors"]
|
||||
|
||||
SA1["Script Actor ('MonitorSpeed')<br/>— coordinator"]
|
||||
SA2["Script Actor ('CalculateOEE')<br/>— coordinator"]
|
||||
AA1["Alarm Actor ('OverTemp')<br/>— coordinator (computed)"]
|
||||
AA2["Alarm Actor ('LowPressure')<br/>— coordinator (computed)"]
|
||||
NAA1["Native Alarm Actor ('OpcUaServer1')<br/>— read-only mirror, peer to Alarm Actor"]
|
||||
|
||||
SEA1["Script Execution Actor<br/>— short-lived, per invocation"]
|
||||
SEA2["Script Execution Actor<br/>— short-lived, per invocation"]
|
||||
AEA1["Alarm Execution Actor<br/>— short-lived, per on-trigger invocation"]
|
||||
|
||||
IA2CHILD["… (Script / Alarm Actors)"]
|
||||
|
||||
DMS --> IA1
|
||||
DMS --> IA2
|
||||
DMS -.-> IAMORE
|
||||
|
||||
IA1 --> SA1
|
||||
IA1 --> SA2
|
||||
IA1 --> AA1
|
||||
IA1 --> AA2
|
||||
IA1 --> NAA1
|
||||
|
||||
SA1 --> SEA1
|
||||
SA2 --> SEA2
|
||||
AA1 --> AEA1
|
||||
|
||||
IA2 -.-> IA2CHILD
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class DMS proc
|
||||
class IA1,IA2 start
|
||||
class SA1,SA2 dec
|
||||
class AA1,AA2 bad
|
||||
class NAA1 alt
|
||||
class SEA1,SEA2,AEA1 warn
|
||||
class IAMORE,IA2CHILD muted
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -25,22 +25,28 @@ Site clusters only. The central cluster does not buffer messages.
|
||||
|
||||
## Message Lifecycle
|
||||
|
||||
```
|
||||
Script submits message
|
||||
│
|
||||
▼
|
||||
Attempt immediate delivery
|
||||
│
|
||||
├── Success → Remove from buffer
|
||||
│
|
||||
└── Failure → Buffer message
|
||||
│
|
||||
▼
|
||||
Retry loop (per retry policy)
|
||||
│
|
||||
├── Success → Remove from buffer + notify standby
|
||||
│
|
||||
└── Max retries exhausted → Park message
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
A([Script submits message]) --> B[Attempt immediate delivery]
|
||||
B --> C{Delivered?}
|
||||
C -->|Success| D([Remove from buffer])
|
||||
C -->|Failure| E[Buffer message]
|
||||
E --> F[Retry loop<br/>per retry policy]
|
||||
F --> G{Retry outcome}
|
||||
G -->|Success| H([Remove from buffer<br/>+ notify standby])
|
||||
G -->|Max retries exhausted| I([Park message<br/>dead-letter])
|
||||
|
||||
classDef ok fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef buf fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef bad fill:#f8cecc,stroke:#b85450,color:#111111;
|
||||
class A,D,H ok
|
||||
class B,F proc
|
||||
class C,G dec
|
||||
class E buf
|
||||
class I bad
|
||||
```
|
||||
|
||||
For notifications, "delivery" means forwarding the message to the central cluster via Central–Site Communication; "success" is central's ack, on which the message is cleared. Notifications are retried at the fixed forward interval until central acks, but — like every other category — they are bounded by the engine's `DefaultMaxRetries` cap: a sustained central outage that exceeds `DefaultMaxRetries × forward-interval` will park the buffered notification, after which an operator can Retry/Discard it via the parked-message UI. Operationally, the cap is sized so the normal central-recovery window stays well inside it; "do not park" is the design's operational intent on the happy path, not an absolute invariant. Callers that genuinely require unbounded retry pass `maxRetries: 0` on `EnqueueAsync` (the documented "no limit" escape hatch — see `StoreAndForward-015`).
|
||||
|
||||
@@ -92,19 +92,31 @@ The manifest is plaintext so the import wizard can preview bundle contents and s
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
ZB.MOM.WW.ScadaBridge.Transport
|
||||
├── IBundleExporter
|
||||
│ ExportAsync(ExportSelection, Passphrase?, ct) → Stream
|
||||
├── IBundleImporter
|
||||
│ LoadAsync(stream, Passphrase?, ct) → BundleSession
|
||||
│ PreviewAsync(sessionId, ct) → ImportPreview
|
||||
│ ApplyAsync(sessionId, resolutions, ct) → ImportResult
|
||||
├── DependencyResolver
|
||||
├── BundleSerializer (manifest + content JSON; ZIP packer)
|
||||
├── BundleSecretEncryptor (AES-256-GCM + PBKDF2)
|
||||
├── BundleSessionStore (in-memory, TTL'd)
|
||||
└── ManifestValidator (schema/version gating, hash check)
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
subgraph T["ZB.MOM.WW.ScadaBridge.Transport"]
|
||||
EXPORTER["IBundleExporter<br/>ExportAsync(ExportSelection, Passphrase?, ct) → Stream"]
|
||||
IMPORTER["IBundleImporter<br/>LoadAsync(stream, Passphrase?, ct) → BundleSession<br/>PreviewAsync(sessionId, ct) → ImportPreview<br/>ApplyAsync(sessionId, resolutions, ct) → ImportResult"]
|
||||
RESOLVER["DependencyResolver"]
|
||||
SERIALIZER["BundleSerializer<br/>(manifest + content JSON; ZIP packer)"]
|
||||
ENCRYPTOR["BundleSecretEncryptor<br/>(AES-256-GCM + PBKDF2)"]
|
||||
SESSIONSTORE["BundleSessionStore<br/>(in-memory, TTL'd)"]
|
||||
MANIFESTVALIDATOR["ManifestValidator<br/>(schema/version gating, hash check)"]
|
||||
end
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class EXPORTER,IMPORTER proc
|
||||
class RESOLVER,SERIALIZER start
|
||||
class ENCRYPTOR alt
|
||||
class SESSIONSTORE warn
|
||||
class MANIFESTVALIDATOR dec
|
||||
class T muted
|
||||
```
|
||||
|
||||
The component is central-only. It is registered in `ZB.MOM.WW.ScadaBridge.Host` for central roles only, never for site roles. All persistence flows through existing audited repository interfaces in `ZB.MOM.WW.ScadaBridge.ConfigurationDatabase` — the component does not call `DbContext.SaveChangesAsync` directly. `BundleSessionStore` is in-process on the active central node (matching Blazor Server circuit affinity): 30-minute TTL, eviction on expiry, 3-strike passphrase lockout per session.
|
||||
@@ -132,21 +144,49 @@ The user can toggle "include all dependencies" off (with a warning that the bund
|
||||
|
||||
### Backend
|
||||
|
||||
```
|
||||
User (Design role) ─► Central UI Export wizard
|
||||
│
|
||||
▼
|
||||
IBundleExporter
|
||||
│
|
||||
├─► DependencyResolver ─► repositories (read)
|
||||
├─► EntitySerializer ─► content.json
|
||||
├─► BundleSecretEncryptor ► content.enc (if passphrase)
|
||||
├─► ManifestBuilder ─► manifest.json
|
||||
▼
|
||||
ZIP packer → temp file → browser download
|
||||
│
|
||||
▼
|
||||
IAuditService.LogAsync(BundleExported …)
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
USER(["User (Design role)"])
|
||||
WIZARD["Central UI Export wizard"]
|
||||
EXPORTER["IBundleExporter"]
|
||||
RESOLVER["DependencyResolver"]
|
||||
REPOS[("repositories (read)")]
|
||||
SERIALIZER["EntitySerializer"]
|
||||
CONTENTJSON["content.json"]
|
||||
ENCRYPTOR["BundleSecretEncryptor"]
|
||||
CONTENTENC["content.enc<br/>(if passphrase)"]
|
||||
MANIFESTBUILDER["ManifestBuilder"]
|
||||
MANIFESTJSON["manifest.json"]
|
||||
ZIP["ZIP packer → temp file → browser download"]
|
||||
AUDIT["IAuditService.LogAsync(BundleExported …)"]
|
||||
|
||||
USER --> WIZARD
|
||||
WIZARD --> EXPORTER
|
||||
EXPORTER --> RESOLVER
|
||||
RESOLVER --> SERIALIZER
|
||||
SERIALIZER --> ENCRYPTOR
|
||||
ENCRYPTOR --> MANIFESTBUILDER
|
||||
MANIFESTBUILDER --> ZIP
|
||||
ZIP --> AUDIT
|
||||
|
||||
RESOLVER --> REPOS
|
||||
SERIALIZER --> CONTENTJSON
|
||||
ENCRYPTOR --> CONTENTENC
|
||||
MANIFESTBUILDER --> MANIFESTJSON
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class USER,AUDIT start
|
||||
class WIZARD,EXPORTER,ZIP proc
|
||||
class RESOLVER,SERIALIZER,MANIFESTBUILDER dec
|
||||
class ENCRYPTOR alt
|
||||
class CONTENTJSON,CONTENTENC,MANIFESTJSON warn
|
||||
class REPOS muted
|
||||
```
|
||||
|
||||
Audit event: `BundleExported` — caller, artifact count, content hash, encrypted yes/no, bundle filename.
|
||||
@@ -179,34 +219,36 @@ Bundle references that cannot be satisfied in either the bundle or the target DB
|
||||
|
||||
### Backend
|
||||
|
||||
```
|
||||
User (Admin role) ─► uploads bundle
|
||||
│
|
||||
▼
|
||||
IBundleImporter.LoadAsync
|
||||
· verify SHA-256 (manifest vs content)
|
||||
· check bundleFormatVersion supported
|
||||
· decrypt content.enc with passphrase (if encrypted)
|
||||
· deserialize entities
|
||||
· open BundleSession (30-min TTL)
|
||||
│
|
||||
▼
|
||||
PreviewAsync → diff vs target DB → ImportPreview
|
||||
│
|
||||
▼ (user reviews + resolves conflicts)
|
||||
│
|
||||
ApplyAsync (single EF transaction)
|
||||
· run two-tier semantic validation (minimal name scan + full SemanticValidator)
|
||||
· apply resolutions (add / overwrite / skip / rename)
|
||||
· upsert TemplateFolder hierarchy
|
||||
· IAuditService.LogAsync(BundleImported …)
|
||||
· commit
|
||||
│
|
||||
▼
|
||||
ImportResult → UI step 5
|
||||
│
|
||||
▼
|
||||
"View on Deployments →" (existing page)
|
||||
```mermaid
|
||||
%%{init: {'theme':'base', 'themeVariables': {'textColor':'#111111','lineColor':'#555555','edgeLabelBackground':'#ffffff','fontSize':'15px'}}}%%
|
||||
flowchart TD
|
||||
USER(["User (Admin role) → uploads bundle"])
|
||||
LOAD["IBundleImporter.LoadAsync<br/>· verify SHA-256 (manifest vs content)<br/>· check bundleFormatVersion supported<br/>· decrypt content.enc with passphrase (if encrypted)<br/>· deserialize entities<br/>· open BundleSession (30-min TTL)"]
|
||||
PREVIEW["PreviewAsync → diff vs target DB → ImportPreview"]
|
||||
REVIEW["(user reviews + resolves conflicts)"]
|
||||
APPLY["ApplyAsync (single EF transaction)<br/>· run two-tier semantic validation<br/> (minimal name scan + full SemanticValidator)<br/>· apply resolutions (add / overwrite / skip / rename)<br/>· upsert TemplateFolder hierarchy<br/>· IAuditService.LogAsync(BundleImported …)<br/>· commit"]
|
||||
RESULT["ImportResult → UI step 5"]
|
||||
DEPLOYMENTS["'View on Deployments →' (existing page)"]
|
||||
|
||||
USER --> LOAD
|
||||
LOAD --> PREVIEW
|
||||
PREVIEW --> APPLY
|
||||
PREVIEW -.- REVIEW
|
||||
APPLY --> RESULT
|
||||
RESULT --> DEPLOYMENTS
|
||||
|
||||
classDef start fill:#d5e8d4,stroke:#82b366,color:#111111;
|
||||
classDef proc fill:#dae8fc,stroke:#6c8ebf,color:#111111;
|
||||
classDef dec fill:#fff2cc,stroke:#d6b656,color:#111111;
|
||||
classDef warn fill:#ffe6cc,stroke:#d79b00,color:#111111;
|
||||
classDef alt fill:#e1d5e7,stroke:#9673a6,color:#111111;
|
||||
classDef muted fill:#f5f5f5,stroke:#999999,color:#666666;
|
||||
class USER start
|
||||
class LOAD,RESULT proc
|
||||
class PREVIEW dec
|
||||
class APPLY alt
|
||||
class DEPLOYMENTS warn
|
||||
class REVIEW muted
|
||||
```
|
||||
|
||||
Authorization: `RequireAdmin` on both the Razor page and `IBundleImporter.*` entrypoints.
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
</packageSource>
|
||||
<packageSource key="dohertj2-gitea">
|
||||
<package pattern="ZB.MOM.WW.MxGateway.*" />
|
||||
<package pattern="ZB.MOM.WW.Health" />
|
||||
<package pattern="ZB.MOM.WW.Health.*" />
|
||||
</packageSource>
|
||||
</packageSourceMapping>
|
||||
<!--
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.ScadaBridge.Host.Actors;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that returns healthy only if this node is the active (leader) node
|
||||
/// in the Akka.NET cluster. Used by Traefik to route traffic to the active node.
|
||||
/// </summary>
|
||||
public class ActiveNodeHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly AkkaHostedService _akkaService;
|
||||
|
||||
/// <summary>Initializes a new <see cref="ActiveNodeHealthCheck"/> with the given Akka hosted service.</summary>
|
||||
/// <param name="akkaService">The Akka hosted service providing access to the actor system and cluster state.</param>
|
||||
public ActiveNodeHealthCheck(AkkaHostedService akkaService)
|
||||
{
|
||||
_akkaService = akkaService;
|
||||
}
|
||||
|
||||
/// <summary>Returns healthy if this node is the cluster leader (active node); otherwise returns unhealthy.</summary>
|
||||
/// <param name="context">Health check context providing registration details.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var system = _akkaService.ActorSystem;
|
||||
if (system == null)
|
||||
return Task.FromResult(HealthCheckResult.Unhealthy("ActorSystem not yet available."));
|
||||
|
||||
var cluster = Cluster.Get(system);
|
||||
var self = cluster.SelfMember;
|
||||
|
||||
if (self.Status != MemberStatus.Up)
|
||||
return Task.FromResult(HealthCheckResult.Unhealthy($"Node not Up (status: {self.Status})."));
|
||||
|
||||
var leader = cluster.State.Leader;
|
||||
if (leader != null && leader == self.Address)
|
||||
return Task.FromResult(HealthCheckResult.Healthy("Active node (cluster leader)."));
|
||||
|
||||
return Task.FromResult(HealthCheckResult.Unhealthy("Standby node (not cluster leader)."));
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.ScadaBridge.Host.Actors;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that verifies this node is an active member of the Akka.NET cluster.
|
||||
/// Returns healthy only if the node's self-member status is Up or Joining.
|
||||
/// </summary>
|
||||
public class AkkaClusterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly AkkaHostedService _akkaService;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes the health check with the Akka hosted service.
|
||||
/// </summary>
|
||||
/// <param name="akkaService">The hosted service providing access to the Akka actor system.</param>
|
||||
public AkkaClusterHealthCheck(AkkaHostedService akkaService)
|
||||
{
|
||||
_akkaService = akkaService;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks that this node is an active member of the Akka.NET cluster.
|
||||
/// </summary>
|
||||
/// <param name="context">Health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var system = _akkaService.ActorSystem;
|
||||
if (system == null)
|
||||
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
|
||||
|
||||
var cluster = Cluster.Get(system);
|
||||
var status = cluster.SelfMember.Status;
|
||||
|
||||
var result = status switch
|
||||
{
|
||||
MemberStatus.Up or MemberStatus.Joining =>
|
||||
HealthCheckResult.Healthy($"Akka cluster member status: {status}"),
|
||||
MemberStatus.Leaving or MemberStatus.Exiting =>
|
||||
HealthCheckResult.Degraded($"Akka cluster member status: {status}"),
|
||||
_ =>
|
||||
HealthCheckResult.Unhealthy($"Akka cluster member status: {status}")
|
||||
};
|
||||
|
||||
return Task.FromResult(result);
|
||||
}
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.ScadaBridge.ConfigurationDatabase;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that verifies database connectivity for Central nodes.
|
||||
/// </summary>
|
||||
public class DatabaseHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly ScadaBridgeDbContext _dbContext;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new <see cref="DatabaseHealthCheck"/>.
|
||||
/// </summary>
|
||||
/// <param name="dbContext">The EF Core database context used to test connectivity.</param>
|
||||
public DatabaseHealthCheck(ScadaBridgeDbContext dbContext)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks database connectivity by attempting to open a connection.
|
||||
/// </summary>
|
||||
/// <param name="context">Health check context providing failure status information.</param>
|
||||
/// <param name="cancellationToken">Cancellation token for the check.</param>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var canConnect = await _dbContext.Database.CanConnectAsync(cancellationToken);
|
||||
return canConnect
|
||||
? HealthCheckResult.Healthy("Database connection is available.")
|
||||
: HealthCheckResult.Unhealthy("Database connection failed.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("Database connection failed.", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
using HealthChecks.UI.Client;
|
||||
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.Health;
|
||||
using ZB.MOM.WW.Health.Akka;
|
||||
using ZB.MOM.WW.Health.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.ScadaBridge.AuditLog;
|
||||
using ZB.MOM.WW.ScadaBridge.CentralUI;
|
||||
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
|
||||
@@ -110,16 +111,37 @@ try
|
||||
?? throw new InvalidOperationException("ScadaBridge:Database:ConfigurationDb connection string is required for Central role.");
|
||||
builder.Services.AddConfigurationDatabase(configDbConnectionString);
|
||||
|
||||
// WP-12: Health checks for readiness gating
|
||||
// WP-12: Health checks for readiness gating — shared ZB.MOM.WW.Health probes.
|
||||
// Check names and the ready/active tier split are preserved: database + akka-cluster
|
||||
// carry the Ready tag (/health/ready), active-node carries the Active tag (/health/active).
|
||||
// The Akka checks resolve ActorSystem from DI via the transient bridge registered below;
|
||||
// the DatabaseHealthCheck<TContext> resolves a scoped ScadaBridgeDbContext (no factory).
|
||||
builder.Services.AddHealthChecks()
|
||||
.AddCheck<DatabaseHealthCheck>("database")
|
||||
.AddCheck<AkkaClusterHealthCheck>("akka-cluster")
|
||||
.AddCheck<ActiveNodeHealthCheck>("active-node");
|
||||
.AddTypeActivatedCheck<DatabaseHealthCheck<ScadaBridgeDbContext>>(
|
||||
"database",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready })
|
||||
.AddTypeActivatedCheck<AkkaClusterHealthCheck>(
|
||||
"akka-cluster",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready },
|
||||
args: AkkaClusterStatusPolicy.Default)
|
||||
.AddTypeActivatedCheck<ActiveNodeHealthCheck>(
|
||||
"active-node",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Active });
|
||||
|
||||
// WP-13: Akka.NET bootstrap via hosted service
|
||||
builder.Services.AddSingleton<AkkaHostedService>();
|
||||
builder.Services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
|
||||
|
||||
// The shared ZB.MOM.WW.Health Akka checks resolve ActorSystem from DI. ScadaBridge owns the
|
||||
// ActorSystem inside AkkaHostedService (not a DI singleton), so bridge it as TRANSIENT: each
|
||||
// resolve re-reads the current value — null while warming up (checks → Degraded), live after.
|
||||
// The factory must NOT throw: GetService<ActorSystem>() must return null (not raise) pre-start.
|
||||
builder.Services.AddTransient<Akka.Actor.ActorSystem>(sp =>
|
||||
sp.GetRequiredService<AkkaHostedService>().ActorSystem!);
|
||||
|
||||
// InboundAPI-022: register the production IActiveNodeGate implementation so
|
||||
// standby-node gating is actually enforced (the InboundApiEndpointFilter
|
||||
// consults IActiveNodeGate and defaults to "allow" when none is registered,
|
||||
@@ -214,23 +236,17 @@ try
|
||||
&& HttpMethods.IsPost(ctx.Request.Method),
|
||||
branch => branch.UseAuditWriteMiddleware());
|
||||
|
||||
// WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready.
|
||||
// REQ-HOST-4a defines readiness as cluster membership + DB connectivity,
|
||||
// explicitly NOT cluster leadership. The leader-only "active-node" check is
|
||||
// excluded here so a fully operational standby central node reports ready;
|
||||
// leadership is reported separately on /health/active.
|
||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||
{
|
||||
Predicate = check => check.Name != "active-node",
|
||||
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
|
||||
});
|
||||
|
||||
// Active node endpoint — returns 200 only on the cluster leader; used by Traefik for routing
|
||||
app.MapHealthChecks("/health/active", new HealthCheckOptions
|
||||
{
|
||||
Predicate = check => check.Name == "active-node",
|
||||
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
|
||||
});
|
||||
// WP-12: Map the canonical three-tier health endpoints in one call:
|
||||
// /health/ready — Ready-tagged checks (database + akka-cluster). REQ-HOST-4a defines
|
||||
// readiness as cluster membership + DB connectivity, explicitly NOT
|
||||
// cluster leadership, so the leader-only active-node check is excluded
|
||||
// (a fully operational standby central node still reports ready).
|
||||
// /health/active — Active-tagged check (active-node); returns 200 only on the cluster
|
||||
// leader; used by Traefik for routing.
|
||||
// /healthz — bare process liveness; runs no checks (always 200 while the process
|
||||
// is up). New tier added by adopting the shared library.
|
||||
// All three are anonymous and use the canonical ZbHealthWriter JSON output.
|
||||
app.MapZbHealth();
|
||||
|
||||
app.MapStaticAssets();
|
||||
app.MapCentralUI<ZB.MOM.WW.ScadaBridge.Host.Components.App>();
|
||||
|
||||
@@ -73,6 +73,13 @@ public static class SiteServiceRegistration
|
||||
services.AddSingleton<AkkaHostedService>();
|
||||
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
|
||||
|
||||
// The shared ZB.MOM.WW.Health Akka checks resolve ActorSystem from DI. ScadaBridge owns the
|
||||
// ActorSystem inside AkkaHostedService (not a DI singleton), so bridge it as TRANSIENT: each
|
||||
// resolve re-reads the current value — null while warming up (checks → Degraded), live after.
|
||||
// The factory must NOT throw: GetService<ActorSystem>() must return null (not raise) pre-start.
|
||||
services.AddTransient<Akka.Actor.ActorSystem>(sp =>
|
||||
sp.GetRequiredService<AkkaHostedService>().ActorSystem!);
|
||||
|
||||
// Cluster node status provider for health reports
|
||||
services.AddSingleton<IClusterNodeProvider>(sp =>
|
||||
{
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
<PackageReference Include="Akka.Cluster.Tools" />
|
||||
<PackageReference Include="Akka.Hosting" />
|
||||
<PackageReference Include="Akka.Remote.Hosting" />
|
||||
<PackageReference Include="AspNetCore.HealthChecks.UI.Client" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Design">
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
@@ -29,6 +28,9 @@
|
||||
<!-- Transitive override: Akka.Hosting 1.5.62 pins OpenTelemetry.Api 1.9.0 which is flagged
|
||||
(GHSA-g94r-2vxg-569j, GHSA-8785-wc3w-h8q6). Bumping directly clears both advisories. -->
|
||||
<PackageReference Include="OpenTelemetry.Api" />
|
||||
<PackageReference Include="ZB.MOM.WW.Health" />
|
||||
<PackageReference Include="ZB.MOM.WW.Health.Akka" />
|
||||
<PackageReference Include="ZB.MOM.WW.Health.EntityFrameworkCore" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
|
||||
using ZB.MOM.WW.ScadaBridge.Communication;
|
||||
using ZB.MOM.WW.ScadaBridge.Host;
|
||||
using ZB.MOM.WW.ScadaBridge.Host.Actors;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies the DI bridge that exposes the Akka <see cref="ActorSystem"/> — owned by
|
||||
/// <see cref="AkkaHostedService"/>, not registered as a DI singleton — to consumers that
|
||||
/// resolve <c>ActorSystem</c> from the container (notably the shared ZB.MOM.WW.Health Akka
|
||||
/// checks). The bridge is registered TRANSIENT so each resolve re-reads the current value:
|
||||
/// null while the hosted service is warming up (checks treat that as Degraded), the live
|
||||
/// system afterwards. A SINGLETON would cache the startup-time null forever.
|
||||
/// </summary>
|
||||
public sealed class ActorSystemBridgeTests
|
||||
{
|
||||
[Fact]
|
||||
public void ActorSystem_ResolvesNull_BeforeHostedServiceStarts()
|
||||
{
|
||||
var services = new ServiceCollection();
|
||||
|
||||
// Register AkkaHostedService the same way Program.cs does, supplying the minimal
|
||||
// constructor dependencies so the container can build it. Its ActorSystem property
|
||||
// is null until StartAsync runs — which it never does here.
|
||||
services.AddSingleton(Options.Create(new NodeOptions()));
|
||||
services.AddSingleton(Options.Create(new ClusterOptions()));
|
||||
services.AddSingleton(Options.Create(new CommunicationOptions()));
|
||||
services.AddSingleton<ILogger<AkkaHostedService>>(NullLogger<AkkaHostedService>.Instance);
|
||||
services.AddSingleton<AkkaHostedService>();
|
||||
|
||||
// The bridge under test: TRANSIENT factory that re-reads the owned ActorSystem.
|
||||
services.AddTransient<ActorSystem>(sp =>
|
||||
sp.GetRequiredService<AkkaHostedService>().ActorSystem!);
|
||||
|
||||
using var provider = services.BuildServiceProvider();
|
||||
|
||||
// The hosted service has not started, so the bridge must yield null (not throw).
|
||||
Assert.Null(provider.GetService<ActorSystem>());
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,20 @@
|
||||
using System.Linq;
|
||||
using Microsoft.AspNetCore.Mvc.Testing;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using ZB.MOM.WW.ScadaBridge.Host.Health;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.Health;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// WP-12: Tests for /health/ready and /health/active endpoints.
|
||||
/// WP-12: Tests for the three-tier health endpoints after adopting the shared
|
||||
/// ZB.MOM.WW.Health probes. Verifies that /health/ready, /health/active and the new
|
||||
/// /healthz tier are mapped, and that the readiness/active tier split is now carried by
|
||||
/// the canonical <see cref="ZbHealthTags"/> (Ready for database + akka-cluster, Active for
|
||||
/// active-node) rather than by check-name predicates. These are pure route/tag assertions
|
||||
/// — they require no database, LDAP, or formed Akka cluster.
|
||||
/// </summary>
|
||||
public class HealthCheckTests : IDisposable
|
||||
{
|
||||
@@ -25,41 +34,49 @@ public class HealthCheckTests : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
private WebApplicationFactory<Program> CreateCentralFactory()
|
||||
{
|
||||
var factory = new WebApplicationFactory<Program>()
|
||||
.WithWebHostBuilder(builder =>
|
||||
{
|
||||
builder.ConfigureAppConfiguration((context, config) =>
|
||||
{
|
||||
config.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["ScadaBridge:Node:NodeHostname"] = "localhost",
|
||||
["ScadaBridge:Node:RemotingPort"] = "0",
|
||||
["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
|
||||
["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
|
||||
["ScadaBridge:Database:SkipMigrations"] = "true",
|
||||
});
|
||||
});
|
||||
builder.UseSetting("ScadaBridge:Node:Role", "Central");
|
||||
builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
|
||||
});
|
||||
_disposables.Add(factory);
|
||||
return factory;
|
||||
}
|
||||
|
||||
private static IEnumerable<HealthCheckRegistration> Registrations(WebApplicationFactory<Program> factory) =>
|
||||
factory.Services.GetRequiredService<IOptions<HealthCheckServiceOptions>>().Value.Registrations;
|
||||
|
||||
[Fact]
|
||||
public async Task HealthReady_Endpoint_ReturnsResponse()
|
||||
public async Task HealthReady_Endpoint_IsMapped()
|
||||
{
|
||||
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
|
||||
try
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
|
||||
|
||||
var factory = new WebApplicationFactory<Program>()
|
||||
.WithWebHostBuilder(builder =>
|
||||
{
|
||||
builder.ConfigureAppConfiguration((context, config) =>
|
||||
{
|
||||
config.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["ScadaBridge:Node:NodeHostname"] = "localhost",
|
||||
["ScadaBridge:Node:RemotingPort"] = "0",
|
||||
["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
|
||||
["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
|
||||
["ScadaBridge:Database:SkipMigrations"] = "true",
|
||||
});
|
||||
});
|
||||
builder.UseSetting("ScadaBridge:Node:Role", "Central");
|
||||
builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
|
||||
});
|
||||
_disposables.Add(factory);
|
||||
|
||||
var factory = CreateCentralFactory();
|
||||
var client = factory.CreateClient();
|
||||
_disposables.Add(client);
|
||||
|
||||
var response = await client.GetAsync("/health/ready");
|
||||
|
||||
// The endpoint exists and returns a status code.
|
||||
// With test infrastructure (no real DB), the database check may fail,
|
||||
// so we accept either 200 (Healthy) or 503 (Unhealthy).
|
||||
// The endpoint exists and returns a status code. With test infrastructure
|
||||
// (no real DB / cluster) the readiness checks may report Unhealthy, so we
|
||||
// accept either 200 (Healthy/Degraded) or 503 (Unhealthy) — never 404.
|
||||
Assert.NotEqual(System.Net.HttpStatusCode.NotFound, response.StatusCode);
|
||||
Assert.True(
|
||||
response.StatusCode == System.Net.HttpStatusCode.OK ||
|
||||
response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable,
|
||||
@@ -72,39 +89,19 @@ public class HealthCheckTests : IDisposable
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HealthActive_Endpoint_ReturnsResponse()
|
||||
public async Task HealthActive_Endpoint_IsMapped()
|
||||
{
|
||||
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
|
||||
try
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
|
||||
|
||||
var factory = new WebApplicationFactory<Program>()
|
||||
.WithWebHostBuilder(builder =>
|
||||
{
|
||||
builder.ConfigureAppConfiguration((context, config) =>
|
||||
{
|
||||
config.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["ScadaBridge:Node:NodeHostname"] = "localhost",
|
||||
["ScadaBridge:Node:RemotingPort"] = "0",
|
||||
["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
|
||||
["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
|
||||
["ScadaBridge:Database:SkipMigrations"] = "true",
|
||||
});
|
||||
});
|
||||
builder.UseSetting("ScadaBridge:Node:Role", "Central");
|
||||
builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
|
||||
});
|
||||
_disposables.Add(factory);
|
||||
|
||||
var factory = CreateCentralFactory();
|
||||
var client = factory.CreateClient();
|
||||
_disposables.Add(client);
|
||||
|
||||
var response = await client.GetAsync("/health/active");
|
||||
|
||||
// In test mode, the ActorSystem may not be fully available,
|
||||
// so the active-node check returns 503 (Unhealthy).
|
||||
Assert.NotEqual(System.Net.HttpStatusCode.NotFound, response.StatusCode);
|
||||
Assert.True(
|
||||
response.StatusCode == System.Net.HttpStatusCode.OK ||
|
||||
response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable,
|
||||
@@ -117,46 +114,21 @@ public class HealthCheckTests : IDisposable
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task HealthReady_Endpoint_ExcludesActiveNodeCheck()
|
||||
public async Task Healthz_LivenessEndpoint_IsMappedAndReturns200()
|
||||
{
|
||||
// Host-001 regression: /health/ready must reflect cluster membership + DB
|
||||
// connectivity only (REQ-HOST-4a), NOT cluster leadership. The leader-only
|
||||
// "active-node" check belongs solely to /health/active. If /health/ready
|
||||
// included "active-node", a fully operational standby central node would
|
||||
// permanently report 503, breaking load-balancer failover readiness.
|
||||
// New tier added by adopting the shared library: /healthz runs no checks, so it
|
||||
// returns 200 as long as the process is up — independent of DB / cluster state.
|
||||
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
|
||||
try
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
|
||||
|
||||
var factory = new WebApplicationFactory<Program>()
|
||||
.WithWebHostBuilder(builder =>
|
||||
{
|
||||
builder.ConfigureAppConfiguration((context, config) =>
|
||||
{
|
||||
config.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["ScadaBridge:Node:NodeHostname"] = "localhost",
|
||||
["ScadaBridge:Node:RemotingPort"] = "0",
|
||||
["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
|
||||
["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
|
||||
["ScadaBridge:Database:SkipMigrations"] = "true",
|
||||
});
|
||||
});
|
||||
builder.UseSetting("ScadaBridge:Node:Role", "Central");
|
||||
builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
|
||||
});
|
||||
_disposables.Add(factory);
|
||||
|
||||
var factory = CreateCentralFactory();
|
||||
var client = factory.CreateClient();
|
||||
_disposables.Add(client);
|
||||
|
||||
var response = await client.GetAsync("/health/ready");
|
||||
var body = await response.Content.ReadAsStringAsync();
|
||||
var response = await client.GetAsync("/healthz");
|
||||
|
||||
// The readiness body lists each executed check by name in its entries map.
|
||||
// The leader-only "active-node" check must not be among them.
|
||||
Assert.DoesNotContain("active-node", body);
|
||||
Assert.Equal(System.Net.HttpStatusCode.OK, response.StatusCode);
|
||||
}
|
||||
finally
|
||||
{
|
||||
@@ -165,43 +137,54 @@ public class HealthCheckTests : IDisposable
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ActiveNodeHealthCheck_SystemNotStarted_ReturnsUnhealthy()
|
||||
public void ReadyTier_Carries_Database_And_AkkaCluster()
|
||||
{
|
||||
// AkkaHostedService before StartAsync has ActorSystem == null.
|
||||
// The integration test (HealthActive_Endpoint_ReturnsResponse) validates the full
|
||||
// endpoint wiring. This test validates the null-system path via WebApplicationFactory
|
||||
// where the ActorSystem may not be available.
|
||||
// Host-001 regression guard: readiness reflects cluster membership + DB connectivity
|
||||
// only (REQ-HOST-4a), NOT cluster leadership. The split is now carried by the Ready tag
|
||||
// rather than a check-name predicate: database + akka-cluster are Ready-tagged, and the
|
||||
// leader-only active-node check is NOT — so a fully operational standby central node
|
||||
// still reports ready on /health/ready.
|
||||
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
|
||||
try
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
|
||||
var factory = new WebApplicationFactory<Program>()
|
||||
.WithWebHostBuilder(builder =>
|
||||
{
|
||||
builder.ConfigureAppConfiguration((context, config) =>
|
||||
{
|
||||
config.AddInMemoryCollection(new Dictionary<string, string?>
|
||||
{
|
||||
["ScadaBridge:Node:NodeHostname"] = "localhost",
|
||||
["ScadaBridge:Node:RemotingPort"] = "0",
|
||||
["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
|
||||
["ScadaBridge:Database:SkipMigrations"] = "true",
|
||||
});
|
||||
});
|
||||
builder.UseSetting("ScadaBridge:Node:Role", "Central");
|
||||
builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
|
||||
});
|
||||
_disposables.Add(factory);
|
||||
var factory = CreateCentralFactory();
|
||||
|
||||
var client = factory.CreateClient();
|
||||
_disposables.Add(client);
|
||||
var registrations = Registrations(factory).ToDictionary(r => r.Name);
|
||||
|
||||
var response = await client.GetAsync("/health/active");
|
||||
var body = await response.Content.ReadAsStringAsync();
|
||||
Assert.True(registrations.ContainsKey("database"), "Expected a 'database' health check.");
|
||||
Assert.True(registrations.ContainsKey("akka-cluster"), "Expected an 'akka-cluster' health check.");
|
||||
|
||||
// Active-node check returns 503 when ActorSystem is not yet available or not leader
|
||||
Assert.Equal(System.Net.HttpStatusCode.ServiceUnavailable, response.StatusCode);
|
||||
Assert.Contains("active-node", body);
|
||||
Assert.Contains(ZbHealthTags.Ready, registrations["database"].Tags);
|
||||
Assert.Contains(ZbHealthTags.Ready, registrations["akka-cluster"].Tags);
|
||||
|
||||
// The leader-only active-node check must NOT be on the readiness tier.
|
||||
Assert.DoesNotContain(ZbHealthTags.Ready, registrations["active-node"].Tags);
|
||||
}
|
||||
finally
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", previousEnv);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ActiveTier_Carries_Only_ActiveNode()
|
||||
{
|
||||
// The active-node leader check carries the Active tag (→ /health/active); the readiness
|
||||
// checks do not, so /health/active reports leadership alone.
|
||||
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
|
||||
try
|
||||
{
|
||||
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
|
||||
var factory = CreateCentralFactory();
|
||||
|
||||
var registrations = Registrations(factory).ToDictionary(r => r.Name);
|
||||
|
||||
Assert.True(registrations.ContainsKey("active-node"), "Expected an 'active-node' health check.");
|
||||
Assert.Contains(ZbHealthTags.Active, registrations["active-node"].Tags);
|
||||
|
||||
Assert.DoesNotContain(ZbHealthTags.Active, registrations["database"].Tags);
|
||||
Assert.DoesNotContain(ZbHealthTags.Active, registrations["akka-cluster"].Tags);
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user