Compare commits
115 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1c579410cd | |||
| b0a62a9f3b | |||
| 1f76eac97a | |||
| b45e0be427 | |||
| e4a3f07c99 | |||
| b88ae5db10 | |||
| ec9599e234 | |||
| 8ce57e47a3 | |||
| 1b7f995aea | |||
| 4fca4e1aca | |||
| 7b2f64fdb8 | |||
| 05471dc36c | |||
| 7bba86b2af | |||
| 5f48f81d5a | |||
| 24796f2c12 | |||
| 7bec2fd4db | |||
| ab8900eee5 | |||
| 9a67ebc8a8 | |||
| ad7f9e731f | |||
| a5d857d5b2 | |||
| a79ed5fff1 | |||
| aaf869145a | |||
| 08cddfe128 | |||
| df0dc516c3 | |||
| febe462750 | |||
| c18943f6e1 | |||
| 0b8cad1870 | |||
| b1b3f3ff23 | |||
| c1ce5833e9 | |||
| 83b8d75112 | |||
| 3e9793eff7 | |||
| 3e4450e0b1 | |||
| 3be4e97b89 | |||
| 1d7028c2f9 | |||
| fae960c157 | |||
| c3ae458a95 | |||
| fcf0963f1c | |||
| b599d81f86 | |||
| 826b65c6a1 | |||
| 258468f94b | |||
| e17292dc7f | |||
| 523d79cab0 | |||
| 10057cfa40 | |||
| 7869494393 | |||
| 6667b233b6 | |||
| f9c7d6a577 | |||
| 0ec9ec29ef | |||
| 72de494b9f | |||
| de6a8974c1 | |||
| 84e225e0ad | |||
| b9bdfee189 | |||
| 1b6dedc142 | |||
| da074adce9 | |||
| 60d2fdf25c | |||
| a71d33be35 | |||
| f21883d607 | |||
| 974d835d08 | |||
| 534982948a | |||
| 200fd6b4c4 | |||
| 897b06016c | |||
| 2eb3ceb961 | |||
| d686e12123 | |||
| 497d8be1d5 | |||
| 33d40901d2 | |||
| 26833073ca | |||
| c843abf8b1 | |||
| 6fa29c6c9a | |||
| d731ed98fa | |||
| 33912694fd | |||
| 0a679f2c2a | |||
| f5552c23d4 | |||
| 318e432d93 | |||
| a32ba1f5c5 | |||
| 9071a3aae0 | |||
| 2c1dc8bb14 | |||
| 2c938ea6f7 | |||
| 4cef8124fe | |||
| 4893f7288d | |||
| 47acdde78d | |||
| c6d9b20d9f | |||
| 11de14d12e | |||
| aadbf49678 | |||
| 70d764b063 | |||
| 11bcff6af5 | |||
| de41963587 | |||
| a78b212c95 | |||
| 075c0e69da | |||
| b7f5e887ee | |||
| 933dd1a874 | |||
| c1619d95f5 | |||
| 8ba289f975 | |||
| d0777eee29 | |||
| 83856b7c27 | |||
| c4f315ec90 | |||
| 257caa7bd1 | |||
| 6534875476 | |||
| d2d7730830 | |||
| 2844180865 | |||
| d3ab2bfbaf | |||
| 88e773af36 | |||
| f35ebd7aaf | |||
| 0cbb82e466 | |||
| 7b6884031d | |||
| 7ff7a60ae0 | |||
| 8faa2bf23d | |||
| 2099713ed8 | |||
| c05ffc7b39 | |||
| 60017177cb | |||
| 26bae36f8b | |||
| 368390ea9d | |||
| 8f950722c6 | |||
| 1d729fb0f8 | |||
| 0b99aceacb | |||
| d57b42bcd6 | |||
| 5e87f7e16f |
@@ -42,3 +42,12 @@ config_cache*.db
|
||||
|
||||
# Client CLI/UI runtime scratch (last-connected endpoint cache)
|
||||
session.dat
|
||||
|
||||
# Secrets / local credentials — never commit
|
||||
sql_login.txt
|
||||
|
||||
# OPC UA certificate store (runtime PKI: own/trusted/issued/rejected certs + keys)
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Host/pki/
|
||||
|
||||
# Documentation audit scratch dir (untracked worktree)
|
||||
.docs-audit/
|
||||
|
||||
@@ -119,7 +119,7 @@ See `docs/v2/dev-environment.md` for the full inventory and rationale.
|
||||
|
||||
## Transport Security
|
||||
|
||||
The server supports configurable OPC UA transport security via the `Security` section in `appsettings.json`. Phase 1 profiles: `None` (default), `Basic256Sha256-Sign`, `Basic256Sha256-SignAndEncrypt`. Security profiles are resolved by `SecurityProfileResolver` at startup. The server certificate is always created even for `None`-only deployments because `UserName` token encryption depends on it. See `docs/security.md` for the full guide.
|
||||
The server supports configurable OPC UA transport security via the `OpcUa:EnabledSecurityProfiles` list in `appsettings.json`. Phase 1 profiles (the `OpcUaSecurityProfile` enum members): `None` (default), `Basic256Sha256Sign`, `Basic256Sha256SignAndEncrypt`. Security policies are built from the enabled profiles by `BuildSecurityPolicies` at startup. The server certificate is always created even for `None`-only deployments because `UserName` token encryption depends on it. See `docs/security.md` for the full guide.
|
||||
|
||||
## Redundancy
|
||||
|
||||
@@ -127,13 +127,15 @@ The server supports non-transparent warm/hot redundancy via the `Redundancy` sec
|
||||
|
||||
## LDAP Authentication
|
||||
|
||||
The server uses LDAP-based user authentication via the `Authentication.Ldap` section in `appsettings.json`. When enabled, credentials are validated by LDAP bind against a GLAuth server (installed at `C:\publish\glauth\`), and LDAP group membership maps to OPC UA permissions: `ReadOnly` (browse/read), `WriteOperate` (write FreeAccess/Operate attributes), `WriteTune` (write Tune attributes), `WriteConfigure` (write Configure attributes), `AlarmAck` (alarm acknowledgment). `LdapUserAuthenticator` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/LdapUserAuthenticator.cs`) implements `IUserAuthenticator`. See `docs/Security.md` for the full guide and `C:\publish\glauth\auth.md` for LDAP user/group reference.
|
||||
The server uses LDAP-based user authentication via the `Security:Ldap` section in `appsettings.json`. When enabled, credentials are validated by LDAP bind against a GLAuth server, and LDAP group membership maps to OPC UA permissions: `ReadOnly` (browse/read), `WriteOperate` (write FreeAccess/Operate attributes), `WriteTune` (write Tune attributes), `WriteConfigure` (write Configure attributes), `AlarmAck` (alarm acknowledgment). `LdapOpcUaUserAuthenticator` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/OpcUa/LdapOpcUaUserAuthenticator.cs`) implements `IOpcUaUserAuthenticator`, delegating the LDAP bind + group lookup to `OtOpcUaLdapAuthService` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/Ldap/OtOpcUaLdapAuthService.cs`, an `ILdapAuthService`). See `docs/security.md` for the full guide.
|
||||
|
||||
Dev/test LDAP is the **shared GLAuth** running on the Linux Docker host at `10.100.0.35:3893` (baseDN `dc=zb,dc=local`, plaintext/`Transport=None`). It is managed via `scadaproj/infra/glauth/` (source of truth + deploy runbook). Single bind account `cn=serviceaccount,dc=zb,dc=local` / `serviceaccount123`; all test users password `password`. The docker-dev compose binds this shared instance directly — `DevStubMode` is no longer used. The per-VM NSSM GLAuth at `C:\publish\glauth\` and the old base DNs `dc=lmxopcua,dc=local` / `dc=otopcua,dc=local` are obsolete. (The integration-test harness under `tests/.../Host.IntegrationTests/` uses a separate ephemeral bitnami/openldap on port 3894 for automated tests — that is distinct from the shared dev GLAuth.)
|
||||
|
||||
## Library Preferences
|
||||
|
||||
- **Logging**: Serilog with rolling daily file sink
|
||||
- **Unit tests**: xUnit + Shouldly for assertions
|
||||
- **Service hosting (Server, Admin)**: .NET generic host with `AddWindowsService` (decision #30 — replaced TopShelf in v2; see `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUaServerService.cs`)
|
||||
- **Service hosting (Server, Admin)**: .NET generic host with `AddWindowsService` (decision #30 — replaced TopShelf in v2; see `src/Server/ZB.MOM.WW.OtOpcUa.Host/OpcUa/OtOpcUaServerHostedService.cs`)
|
||||
- **OPC UA**: OPC Foundation UA .NET Standard stack (https://github.com/opcfoundation/ua-.netstandard) — NuGet: `OPCFoundation.NetStandard.Opc.Ua.Server`
|
||||
|
||||
## OPC UA .NET Standard Documentation
|
||||
|
||||
@@ -74,16 +74,16 @@
|
||||
<PackageVersion Include="Novell.Directory.Ldap.NETStandard" Version="3.6.0" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Client" Version="1.5.378.106" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Configuration" Version="1.5.378.106" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Server" Version="1.5.374.126" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Server" Version="1.5.378.106" />
|
||||
<PackageVersion Include="OpenTelemetry.Exporter.Prometheus.AspNetCore" Version="1.15.3-beta.1" />
|
||||
<PackageVersion Include="OpenTelemetry.Extensions.Hosting" Version="1.15.3" />
|
||||
<PackageVersion Include="Polly.Core" Version="8.6.6" />
|
||||
<PackageVersion Include="S7netplus" Version="0.20.0" />
|
||||
<PackageVersion Include="Serilog" Version="4.3.0" />
|
||||
<PackageVersion Include="Serilog.AspNetCore" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Extensions.Hosting" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog" Version="4.3.1" />
|
||||
<PackageVersion Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||
<PackageVersion Include="Serilog.Extensions.Hosting" Version="10.0.0" />
|
||||
<PackageVersion Include="Serilog.Formatting.Compact" Version="3.0.0" />
|
||||
<PackageVersion Include="Serilog.Settings.Configuration" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Settings.Configuration" Version="10.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.Console" Version="6.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.File" Version="7.0.0" />
|
||||
<PackageVersion Include="Shouldly" Version="4.3.0" />
|
||||
@@ -96,7 +96,18 @@
|
||||
<PackageVersion Include="xunit" Version="2.9.2" />
|
||||
<PackageVersion Include="xunit.runner.visualstudio" Version="3.0.2" />
|
||||
<PackageVersion Include="xunit.v3" Version="1.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.Akka" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.EntityFrameworkCore" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Telemetry" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Telemetry.Serilog" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Client" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Contracts" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Configuration" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Auth.Abstractions" Version="0.1.1" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Auth.Ldap" Version="0.1.1" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Auth.AspNetCore" Version="0.1.1" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Audit" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Theme" Version="0.3.1" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -1,7 +1,28 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<configuration>
|
||||
<packageSources>
|
||||
<clear />
|
||||
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" protocolVersion="3" />
|
||||
<add key="local-mxgw" value="./nuget-packages" />
|
||||
<add key="dohertj2-gitea" value="https://gitea.dohertylan.com/api/packages/dohertj2/nuget/index.json" />
|
||||
</packageSources>
|
||||
<packageSourceMapping>
|
||||
<packageSource key="nuget.org">
|
||||
<package pattern="*" />
|
||||
</packageSource>
|
||||
<packageSource key="local-mxgw">
|
||||
<package pattern="ZB.MOM.WW.MxGateway.*" />
|
||||
</packageSource>
|
||||
<packageSource key="dohertj2-gitea">
|
||||
<package pattern="ZB.MOM.WW.Health" />
|
||||
<package pattern="ZB.MOM.WW.Health.*" />
|
||||
<package pattern="ZB.MOM.WW.Telemetry" />
|
||||
<package pattern="ZB.MOM.WW.Telemetry.*" />
|
||||
<package pattern="ZB.MOM.WW.Configuration" />
|
||||
<package pattern="ZB.MOM.WW.Auth" />
|
||||
<package pattern="ZB.MOM.WW.Auth.*" />
|
||||
<package pattern="ZB.MOM.WW.Audit" />
|
||||
<package pattern="ZB.MOM.WW.Theme" />
|
||||
</packageSource>
|
||||
</packageSourceMapping>
|
||||
</configuration>
|
||||
|
||||
@@ -41,10 +41,10 @@ dotnet build ZB.MOM.WW.OtOpcUa.slnx
|
||||
dotnet test ZB.MOM.WW.OtOpcUa.slnx
|
||||
|
||||
# Run the server in dev (foreground)
|
||||
dotnet run --project src/Server/ZB.MOM.WW.OtOpcUa.Server
|
||||
dotnet run --project src/Server/ZB.MOM.WW.OtOpcUa.Host
|
||||
```
|
||||
|
||||
The server starts on `opc.tcp://localhost:4840` with the `None` security profile. Configure `Security.Profiles` in `src/Server/ZB.MOM.WW.OtOpcUa.Server/appsettings.json` to enable `Basic256Sha256-Sign` or `Basic256Sha256-SignAndEncrypt`. See [docs/security.md](docs/security.md).
|
||||
The server starts on `opc.tcp://localhost:4840` with the `None` security profile. Configure `Security.Profiles` in `src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.json` to enable `Basic256Sha256-Sign` or `Basic256Sha256-SignAndEncrypt`. See [docs/security.md](docs/security.md).
|
||||
|
||||
## Install as Windows Services
|
||||
|
||||
|
||||
+17
-1
@@ -1,6 +1,7 @@
|
||||
# Multi-stage build of OtOpcUa.Host targeting linux-x64. Used by docker-dev/docker-compose.yml
|
||||
# to spin four host containers (admin-a, admin-b, driver-a, driver-b) from a single image —
|
||||
# to spin six host containers (central-1, central-2, site-a-1, site-a-2, site-b-1, site-b-2) from a single image —
|
||||
# Compose drives OTOPCUA_ROLES + Cluster:* env per container to differentiate them.
|
||||
# A separate `migrator` stage (below) applies EF migrations once on bring-up.
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
WORKDIR /src
|
||||
@@ -18,3 +19,18 @@ EXPOSE 4053
|
||||
EXPOSE 4840
|
||||
|
||||
ENTRYPOINT ["dotnet", "OtOpcUa.Host.dll"]
|
||||
|
||||
# ── Migrator (one-shot) ──────────────────────────────────────────────────────
|
||||
# Applies EF Core migrations to the ConfigDb so a fresh SQL volume gets the schema
|
||||
# with no operator step. docker-dev compose runs this once, before cluster-seed +
|
||||
# the host nodes (they depend on it via service_completed_successfully). The host
|
||||
# nodes deliberately do NOT auto-migrate (production owns schema changes), so this
|
||||
# rig-only stage carries that responsibility. The connection comes from the
|
||||
# OTOPCUA_CONFIG_CONNECTION env var read by DesignTimeDbContextFactory.
|
||||
FROM build AS migrator
|
||||
RUN dotnet tool install --global dotnet-ef --version 10.0.7
|
||||
ENV PATH="${PATH}:/root/.dotnet/tools"
|
||||
WORKDIR /src
|
||||
ENTRYPOINT ["dotnet", "ef", "database", "update", \
|
||||
"--project", "src/Core/ZB.MOM.WW.OtOpcUa.Configuration", \
|
||||
"--startup-project", "src/Core/ZB.MOM.WW.OtOpcUa.Configuration"]
|
||||
|
||||
+42
-39
@@ -1,6 +1,6 @@
|
||||
# docker-dev
|
||||
|
||||
Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up **three isolated Akka clusters** + SQL Server + OpenLDAP + Traefik on the same Compose network. All three clusters share the single `OtOpcUa` ConfigDb — multi-tenancy is enforced by per-row `ServerCluster.ClusterId` scoping. Akka.Cluster gossip stays isolated between meshes because their seed-node lists are disjoint, even though they share the same system name `otopcua`.
|
||||
Mac-friendly OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up **one single Akka mesh** (hub-and-spoke topology) + SQL Server + Traefik on the same Compose network. All six host nodes share the single `OtOpcUa` ConfigDb — logical separation between MAIN, SITE-A, and SITE-B is enforced by per-row `ServerCluster.ClusterId` scoping, not by mesh isolation.
|
||||
|
||||
## Stack
|
||||
|
||||
@@ -8,50 +8,48 @@ Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration sm
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `sql` | SQL Server 2022 — single `OtOpcUa` ConfigDb shared by all three clusters | host `14330` → container `1433` |
|
||||
| `traefik` | Routes :80 by Host header / PathPrefix | host `80`, dashboard `8089` |
|
||||
| `sql` | SQL Server 2022 — single `OtOpcUa` ConfigDb shared by all nodes | host `14330` → container `1433` |
|
||||
| `traefik` | Routes `:80` by PathPrefix to central admin nodes | host `80`, dashboard `8089` |
|
||||
|
||||
Authentication runs in `DevStubMode` — every host container has `Authentication__Ldap__DevStubMode=true` set, so the LDAP service is not part of the dev compose right now (the `bitnami/openldap:2.6` image was retired and the legacy tag crashes mid-setup with exit 68). Any non-empty username/password signs in as `FleetAdmin`. To restore a real LDAP service, drop the env var and add an `openldap`-compatible image back to compose.
|
||||
Authentication uses the **shared GLAuth** on the Linux Docker host at `10.100.0.35:3893` (baseDN `dc=zb,dc=local`). Only the central admin nodes authenticate users. Sign in as `multi-role` / `password` to get all three OtOpcUa roles (Administrator, Designer, Viewer), or use any other shared test user with password `password`. Group→role mappings are seeded by `seed/seed-clusters.sql` (`OtOpcUa-Admins`→Administrator, `OtOpcUa-Designers`→Designer, `OtOpcUa-Viewers`→Viewer). The shared GLAuth source of truth and deploy runbook live in `scadaproj/infra/glauth/`.
|
||||
|
||||
### Main cluster — split admin/driver roles
|
||||
### Central nodes — fused admin+driver (MAIN cluster, UI + deploy singleton)
|
||||
|
||||
| Service | Role | Ports |
|
||||
| Service | Roles | Ports |
|
||||
|---|---|---|
|
||||
| `admin-a` | `OTOPCUA_ROLES=admin`, cluster seed | internal `9000` |
|
||||
| `admin-b` | `OTOPCUA_ROLES=admin`, joins admin-a | internal `9000` |
|
||||
| `driver-a` | `OTOPCUA_ROLES=driver` | host `4840` → container `4840` |
|
||||
| `driver-b` | `OTOPCUA_ROLES=driver` | host `4841` → container `4840` |
|
||||
| `central-1` | `OTOPCUA_ROLES=admin,driver`, Akka mesh seed | host `4840` → container `4840`; internal `9000` |
|
||||
| `central-2` | `OTOPCUA_ROLES=admin,driver`, joins central-1 | host `4841` → container `4840`; internal `9000` |
|
||||
|
||||
### Site A cluster — 2-node fused admin+driver
|
||||
`central-1` and `central-2` are the **only** nodes that host the Admin UI and the deploy singleton. They are also the OPC UA publishers for the MAIN cluster. Traefik routes all `PathPrefix(/)` traffic to whichever central node has the leader role.
|
||||
|
||||
| Service | Role | Ports |
|
||||
### Site A nodes — driver-only (SITE-A cluster)
|
||||
|
||||
| Service | Roles | Ports |
|
||||
|---|---|---|
|
||||
| `site-a-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4842` → container `4840` |
|
||||
| `site-a-2` | `OTOPCUA_ROLES=admin,driver`, joins site-a-1 | host `4843` → container `4840` |
|
||||
| `site-a-1` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4842` → container `4840` |
|
||||
| `site-a-2` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4843` → container `4840` |
|
||||
|
||||
### Site B cluster — 2-node fused admin+driver
|
||||
### Site B nodes — driver-only (SITE-B cluster)
|
||||
|
||||
| Service | Role | Ports |
|
||||
| Service | Roles | Ports |
|
||||
|---|---|---|
|
||||
| `site-b-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4844` → container `4840` |
|
||||
| `site-b-2` | `OTOPCUA_ROLES=admin,driver`, joins site-b-1 | host `4845` → container `4840` |
|
||||
| `site-b-1` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4844` → container `4840` |
|
||||
| `site-b-2` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4845` → container `4840` |
|
||||
|
||||
All containers bind Akka remoting to port `4053` inside their own network namespace; the `PublicHostname` of each matches its Compose service name. Akka mesh isolation is enforced purely by disjoint seed lists. Configuration-side isolation is enforced by `ServerCluster.ClusterId` — see "Multi-tenancy" below.
|
||||
Site nodes serve no UI and authenticate no users. The central cluster manages and deploys to them over the shared Akka mesh. All six nodes bind Akka remoting to port `4053` inside their own network namespace; `PublicHostname` for each matches its Compose service name.
|
||||
|
||||
## Multi-tenancy
|
||||
|
||||
All eight host nodes write to the same `OtOpcUa` ConfigDb. The `ServerCluster` table differentiates the three Akka meshes: each Akka cluster maps to one row, and each `ClusterNode` row's `ClusterId` ties the runtime node back to its owning cluster scope.
|
||||
All six host nodes write to the same `OtOpcUa` ConfigDb. The `ServerCluster` table differentiates the three logical clusters: each maps to one row, and each `ClusterNode` row's `ClusterId` ties the runtime node back to its owning cluster scope.
|
||||
|
||||
A one-shot `cluster-seed` Compose service (image `mcr.microsoft.com/mssql-tools`) waits for SQL + the EF auto-migration to complete and then INSERTs the rows below. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops:
|
||||
Two one-shot Compose services bootstrap the DB on bring-up: `migrator` applies the EF Core migrations (so a fresh SQL volume gets the schema with no operator step — the host nodes deliberately do **not** auto-migrate, since production owns schema changes), then `cluster-seed` (image `mcr.microsoft.com/mssql-tools`) INSERTs the rows below. `cluster-seed` and every host node `depend_on` the `migrator` completing (`service_completed_successfully`), so the seed never races an in-progress migration. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops:
|
||||
|
||||
| Akka mesh | `ServerCluster.ClusterId` | `ClusterNode.NodeId` rows |
|
||||
| Logical cluster | `ServerCluster.ClusterId` | `ClusterNode.NodeId` rows |
|
||||
|---|---|---|
|
||||
| Main | `MAIN` | `driver-a`, `driver-b` (OPC UA publishers) |
|
||||
| Main | `MAIN` | `central-1`, `central-2` (OPC UA publishers + admin UI) |
|
||||
| Site A | `SITE-A` | `site-a-1`, `site-a-2` |
|
||||
| Site B | `SITE-B` | `site-b-1`, `site-b-2` |
|
||||
|
||||
`ClusterNode` is the table for **OPC UA-publishing nodes** (not every Akka cluster member), which is why the main cluster's `admin-a` / `admin-b` don't get rows — they're control-plane-only.
|
||||
|
||||
Each `ClusterNode.NodeId` matches the node's `Cluster__PublicHostname` env value (Compose service name) — that's the lookup the runtime uses to resolve its own membership. `ApplicationUri` follows the `urn:OtOpcUa:<NodeId>` convention.
|
||||
|
||||
The SQL lives at `seed/seed-clusters.sql`; the wait-and-apply wrapper lives at `seed/entrypoint.sh`. To re-seed manually:
|
||||
@@ -72,21 +70,25 @@ The DriverHost actor doesn't spawn drivers from raw DriverInstance rows on its o
|
||||
# from the repo root
|
||||
docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
|
||||
# wait ~20 seconds for SQL to come up + all three clusters to form
|
||||
# the one-shot migrator + cluster-seed bootstrap the DB; watch the seed finish:
|
||||
docker compose -f docker-dev/docker-compose.yml logs -f cluster-seed # ^C once it prints "[cluster-seed] done."
|
||||
|
||||
open http://localhost # main cluster admin UI
|
||||
open http://site-a.localhost # site A admin UI
|
||||
open http://site-b.localhost # site B admin UI
|
||||
open http://localhost:9200 # Admin UI (Traefik → central-1 or central-2)
|
||||
open http://localhost:8089 # Traefik dashboard
|
||||
```
|
||||
|
||||
On macOS, `*.localhost` resolves to `127.0.0.1` automatically. On Linux add `127.0.0.1 site-a.localhost site-b.localhost` to `/etc/hosts` if your resolver doesn't.
|
||||
|
||||
The first build takes a few minutes (.NET SDK image + restore + publish). Subsequent rebuilds are faster with Docker's layer cache.
|
||||
The first build takes a few minutes (.NET SDK image + restore + publish). **No manual schema step is needed** — on a fresh SQL volume the one-shot `migrator` service applies the EF migrations (the host nodes deliberately don't auto-migrate, since production owns schema changes), then `cluster-seed` populates the cluster/namespace/driver rows. `cluster-seed` and the host nodes wait for the migrator via `service_completed_successfully`, so nothing races an in-progress migration. A plain `docker compose ... up -d` on an existing volume is a fast no-op for both — the named SQL volume keeps the schema + rows across restarts; only `down -v` wipes them, after which the next `up` re-migrates + re-seeds automatically.
|
||||
|
||||
## Auth (dev only)
|
||||
|
||||
`Authentication__Ldap__DevStubMode=true` is set on every host container, so any non-empty username/password signs in as a `FleetAdmin` user without contacting an LDAP server. **Do not** ship this configuration to production — set `DevStubMode=false` and wire a real LDAP backend before any non-dev deployment.
|
||||
Central nodes authenticate against the shared GLAuth at `10.100.0.35:3893` (baseDN `dc=zb,dc=local`). `DevStubMode` is **not** active. Sign in with any test user (password `password`); `multi-role` / `password` returns all three roles (Administrator, Designer, Viewer). Group→role mappings are seeded by `seed/seed-clusters.sql`. The GLAuth source of truth + deploy runbook is in `scadaproj/infra/glauth/`. **Do not** enable `DevStubMode` outside local debugging — production must always bind a real LDAP backend.
|
||||
|
||||
## Headless deploy
|
||||
|
||||
```bash
|
||||
POST http://localhost:9200/api/deployments
|
||||
X-Api-Key: docker-dev-deploy-key
|
||||
```
|
||||
|
||||
## Tear down
|
||||
|
||||
@@ -94,19 +96,20 @@ The first build takes a few minutes (.NET SDK image + restore + publish). Subseq
|
||||
docker compose -f docker-dev/docker-compose.yml down -v
|
||||
```
|
||||
|
||||
The `-v` drops the SQL + LDAP volumes; remove it to keep ConfigDb state across restarts.
|
||||
The `-v` drops the SQL volume; remove it to keep ConfigDb state across restarts. There is no local LDAP volume — LDAP is the shared external GLAuth on `10.100.0.35:3893`.
|
||||
|
||||
## Failover smoke
|
||||
|
||||
1. Watch the Traefik dashboard at `http://localhost:8089`. Both `admin-a` and `admin-b` should be listed as healthy in the `otopcua-admin` service.
|
||||
2. `docker compose -f docker-dev/docker-compose.yml stop admin-a` — `admin-b` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `admin-b` once its `/health/active` returns 200.
|
||||
3. `docker compose -f docker-dev/docker-compose.yml start admin-a` — `admin-a` rejoins as a follower; `admin-b` keeps the leader role until something disturbs it.
|
||||
1. Watch the Traefik dashboard at `http://localhost:8089`. Both `central-1` and `central-2` should be listed as healthy in the `otopcua-admin` service.
|
||||
2. `docker compose -f docker-dev/docker-compose.yml stop central-1` — `central-2` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `central-2` once its `/health/active` returns 200.
|
||||
3. `docker compose -f docker-dev/docker-compose.yml start central-1` — `central-1` rejoins as a follower; `central-2` keeps the leader role until something disturbs it.
|
||||
|
||||
## Notes
|
||||
|
||||
- This compose is for the **local Mac/Linux developer rig**. The team's CI + soak runs go to the remote docker host at `10.100.0.35` (see `docs/v2/dev-environment.md`); the file there mirrors this one with adjusted port bindings.
|
||||
- The OPC UA driver endpoints are reachable directly from the host (Traefik is only in front of the admin HTTP surface):
|
||||
- Main: `opc.tcp://localhost:4840` (driver-a), `opc.tcp://localhost:4841` (driver-b)
|
||||
- The OPC UA endpoints are reachable directly from the host (Traefik is only in front of the admin HTTP surface):
|
||||
- Main: `opc.tcp://localhost:4840` (central-1), `opc.tcp://localhost:4841` (central-2)
|
||||
- Site A: `opc.tcp://localhost:4842` (site-a-1), `opc.tcp://localhost:4843` (site-a-2)
|
||||
- Site B: `opc.tcp://localhost:4844` (site-b-1), `opc.tcp://localhost:4845` (site-b-2)
|
||||
- Galaxy + Wonderware drivers can't run in Linux containers (they need the Windows-only mxaccessgw + Historian SDK). On non-Windows, `DriverInstanceActor.ShouldStub(driverType, roles)` returns `true` for those types and the actor goes straight to a `Stubbed` state that returns deterministic success.
|
||||
- SQL persistence: ConfigDb state survives container restarts (named Docker volume). Drop the volume with `down -v` for a clean slate.
|
||||
|
||||
+173
-148
@@ -1,40 +1,46 @@
|
||||
# docker-dev/ — Mac-friendly multi-cluster fleet for v2 development + manual UI exercise.
|
||||
# docker-dev/ — Mac-friendly single-mesh hub-and-spoke fleet for v2 development + manual UI exercise.
|
||||
#
|
||||
# Stack (3 separate Akka clusters — all share the single `OtOpcUa` ConfigDb):
|
||||
# sql SQL Server 2022 — hosts the one ConfigDb that all three clusters use
|
||||
# ldap OpenLDAP with the dev users from C:\publish\glauth\auth.md mirrored in
|
||||
# Topology: ONE Akka mesh seeded by `central-1`. Logical separation between
|
||||
# tenants is by ServerCluster.ClusterId rows (MAIN / SITE-A / SITE-B) in the one
|
||||
# shared `OtOpcUa` ConfigDb — NOT by separate meshes. All six host nodes join the
|
||||
# same gossip ring and the central UI deploys to every cluster over it.
|
||||
#
|
||||
# Main cluster (existing — split-role admin / driver pair on a single Akka mesh):
|
||||
# admin-a OtOpcUa.Host with OTOPCUA_ROLES=admin (seed)
|
||||
# admin-b OtOpcUa.Host with OTOPCUA_ROLES=admin (joins admin-a)
|
||||
# driver-a OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
# driver-b OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
# Stack:
|
||||
# sql SQL Server 2022 — hosts the one ConfigDb every node uses
|
||||
# cluster-seed one-shot mssql-tools job that INSERTs the ServerCluster +
|
||||
# ClusterNode rows scoping each tenant, then exits (idempotent)
|
||||
#
|
||||
# Site A cluster (2-node fused admin+driver):
|
||||
# site-a-1, site-a-2 OTOPCUA_ROLES=admin,driver, seed = site-a-1
|
||||
# central-1, central-2 OTOPCUA_ROLES=admin,driver — the ONLY UI + deploy
|
||||
# singleton, plus the MAIN cluster's OPC UA publishers.
|
||||
# Reachable at http://localhost:9200 (via Traefik).
|
||||
# central-1 is the Akka seed node; central-2 joins it.
|
||||
# site-a-1, site-a-2 OTOPCUA_ROLES=driver — driver-only members of the same
|
||||
# site-b-1, site-b-2 mesh, scoped to SITE-A / SITE-B by ClusterId. They
|
||||
# serve no UI and authenticate no users; the central
|
||||
# cluster manages and deploys to them over the mesh.
|
||||
#
|
||||
# Site B cluster (2-node fused admin+driver):
|
||||
# site-b-1, site-b-2 OTOPCUA_ROLES=admin,driver, seed = site-b-1
|
||||
# Auth is real LDAP against the shared GLAuth on the Linux Docker host
|
||||
# (10.100.0.35:3893, dc=zb,dc=local) — there is no LDAP container here.
|
||||
# Only the admin-role central nodes carry the Security__Ldap__* block.
|
||||
# Sign in `multi-role` / `password`.
|
||||
#
|
||||
# traefik PathPrefix → main cluster admin-a/admin-b; Host(`site-a.localhost`) →
|
||||
# site-a-*; Host(`site-b.localhost`) → site-b-*. Add the two site hosts to
|
||||
# your /etc/hosts (or rely on macOS `.localhost` auto-resolution).
|
||||
# traefik PathPrefix(`/`) → central-1 / central-2 (the single UI route).
|
||||
#
|
||||
# Multi-tenancy: ConfigDb is one schema with a `ServerCluster` table; each Akka cluster
|
||||
# corresponds to a row in it (ClusterId = "MAIN" / "SITE-A" / "SITE-B"), and each node's
|
||||
# `ClusterNode.NodeId` points back at the row that owns it. After first boot, sign in to
|
||||
# any cluster's Admin UI and create the matching ServerCluster + ClusterNode rows via
|
||||
# /clusters and /hosts so the runtime knows what configuration scope applies.
|
||||
# OPC UA endpoints (host-side port → container 4840):
|
||||
# central-1 :4840 central-2 :4841
|
||||
# site-a-1 :4842 site-a-2 :4843
|
||||
# site-b-1 :4844 site-b-2 :4845
|
||||
#
|
||||
# Akka mesh isolation: same system name "otopcua" + same remoting port 4053 inside each
|
||||
# container's own network namespace, but with disjoint seed-node lists — gossip never
|
||||
# crosses between the three meshes.
|
||||
# Headless deploy: POST http://localhost:9200/api/deployments with the
|
||||
# X-Api-Key header (Security__DeployApiKey = "docker-dev-deploy-key").
|
||||
#
|
||||
# SQL persistence: the otopcua-mssql-data named volume keeps the ConfigDb schema
|
||||
# + seeded clusters across `docker compose up` cycles; without it a recreate
|
||||
# silently drops the OtOpcUa database.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
# open http://localhost # main cluster Blazor admin UI
|
||||
# open http://site-a.localhost # site A admin UI
|
||||
# open http://site-b.localhost # site B admin UI
|
||||
# open http://localhost:9200 # central Blazor admin UI
|
||||
# open http://localhost:8089 # Traefik dashboard (8080 is the sister scadalink stack)
|
||||
#
|
||||
# Tear-down: docker compose -f docker-dev/docker-compose.yml down -v
|
||||
@@ -51,123 +57,154 @@ services:
|
||||
MSSQL_PID: Developer
|
||||
ports:
|
||||
- "14330:1433"
|
||||
# Persist the ConfigDb across container recreates. Without this the dev SQL
|
||||
# is ephemeral (container writable layer), so a recreate silently drops the
|
||||
# OtOpcUa database and every host node fails its configdb health check until
|
||||
# EF auto-migration + cluster-seed rebuild it. The named volume keeps the
|
||||
# schema + seeded clusters between `docker compose up` cycles.
|
||||
volumes:
|
||||
- otopcua-mssql-data:/var/opt/mssql
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P 'OtOpcUa!Dev123' -No -Q 'SELECT 1' || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
|
||||
# ── Cluster seed (one-shot) ────────────────────────────────────────────────
|
||||
# Waits for SQL + the host containers' EF auto-migration, then INSERTs the
|
||||
# three ServerCluster rows and the six ClusterNode rows that scope each Akka
|
||||
# mesh inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops.
|
||||
cluster-seed:
|
||||
image: mcr.microsoft.com/mssql-tools:latest
|
||||
# ── Migrator (one-shot) ────────────────────────────────────────────────────
|
||||
# Applies EF Core migrations to the OtOpcUa ConfigDb so a fresh SQL volume gets
|
||||
# the schema with no operator step (the host nodes deliberately don't auto-
|
||||
# migrate — production owns schema changes). cluster-seed + every host node
|
||||
# depend on this completing, so nothing races an in-progress migration.
|
||||
# Idempotent: a no-op once the schema is current.
|
||||
migrator:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker-dev/Dockerfile
|
||||
target: migrator
|
||||
image: otopcua-migrator:dev
|
||||
depends_on:
|
||||
sql:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
OTOPCUA_CONFIG_CONNECTION: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
restart: "no"
|
||||
|
||||
# ── Cluster seed (one-shot) ────────────────────────────────────────────────
|
||||
# Runs only after `migrator` completes (so the schema is final — no race), then
|
||||
# INSERTs the three ServerCluster rows and the six ClusterNode rows that scope
|
||||
# each tenant inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops.
|
||||
cluster-seed:
|
||||
image: mcr.microsoft.com/mssql-tools:latest
|
||||
depends_on:
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- ./seed:/seed:ro
|
||||
entrypoint: ["/bin/bash", "/seed/entrypoint.sh"]
|
||||
restart: "no"
|
||||
|
||||
# OpenLDAP was previously here but the bitnami/openldap:2.6 image was retired
|
||||
# (manifest gone) and bitnamilegacy/openldap:2.6 crashes during LDIF setup with
|
||||
# exit 68. For the dev compose every host container now runs with
|
||||
# Authentication__Ldap__DevStubMode=true, so any non-empty username/password
|
||||
# signs in as `FleetAdmin`. Restore a real LDAP service when there's a need
|
||||
# for end-to-end LDAP coverage (the host code path is unchanged).
|
||||
# A local OpenLDAP container used to live here, but the bitnami/openldap:2.6
|
||||
# image was retired (manifest gone) and bitnamilegacy/openldap:2.6 crashes
|
||||
# during LDIF setup (exit 68). Rather than stub auth, the central (admin-role)
|
||||
# containers bind the shared GLAuth on the Linux Docker host (Security__Ldap__*
|
||||
# below: 10.100.0.35:3893, dc=zb,dc=local, DevStubMode=false) — so dev auth
|
||||
# exercises the real LDAP bind + group→role path. Sign in `multi-role` /
|
||||
# `password` (all roles) or any shared test user / `password`.
|
||||
|
||||
admin-a: &otopcua-host
|
||||
# ── Central cluster (2-node fused admin+driver) ─────────────────────────────
|
||||
# The only UI + deploy singleton; also the MAIN cluster's OPC UA publishers.
|
||||
# central-1 seeds the single Akka mesh that every other node joins.
|
||||
|
||||
central-1: &otopcua-host
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker-dev/Dockerfile
|
||||
target: runtime
|
||||
image: otopcua-host:dev
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
|
||||
admin-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
|
||||
driver-a:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
# Resolved at runtime by GalaxyDriver.ResolveApiKey when a DriverInstance's
|
||||
# Gateway.ApiKeySecretRef = "env:GALAXY_MXGW_API_KEY".
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4840:4840"
|
||||
|
||||
driver-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4841:4840"
|
||||
|
||||
# ── Site A cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
# Shares the OtOpcUa ConfigDb with the main + site-b clusters; multi-tenancy is
|
||||
# enforced by ServerCluster.ClusterId rows (configure via /clusters after boot).
|
||||
# Akka isolation comes from the disjoint seed list (seed = site-a-1).
|
||||
|
||||
site-a-1:
|
||||
<<: *otopcua-host
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__PublicHostname: "central-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4840:4840"
|
||||
|
||||
central-2:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "central-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4841:4840"
|
||||
|
||||
# ── Site A cluster (2-node driver-only) ─────────────────────────────────────
|
||||
# Driver-only members of the single mesh, scoped to SITE-A by ClusterId. No UI,
|
||||
# no user auth — managed + deployed to by the central cluster over the mesh.
|
||||
# All site nodes seed central-1.
|
||||
|
||||
site-a-1:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
# Resolved at runtime by GalaxyDriver.ResolveApiKey when a DriverInstance's
|
||||
# Gateway.ApiKeySecretRef = "env:GALAXY_MXGW_API_KEY".
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4842:4840"
|
||||
@@ -176,43 +213,36 @@ services:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-a-1: { condition: service_started }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4843:4840"
|
||||
|
||||
# ── Site B cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
# ── Site B cluster (2-node driver-only) ─────────────────────────────────────
|
||||
|
||||
site-b-1:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4844:4840"
|
||||
@@ -221,21 +251,16 @@ services:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-b-1: { condition: service_started }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4845:4840"
|
||||
@@ -253,9 +278,9 @@ services:
|
||||
volumes:
|
||||
- ./traefik-dynamic.yml:/etc/traefik/dynamic.yml:ro
|
||||
depends_on:
|
||||
- admin-a
|
||||
- admin-b
|
||||
- site-a-1
|
||||
- site-a-2
|
||||
- site-b-1
|
||||
- site-b-2
|
||||
- central-1
|
||||
- central-2
|
||||
|
||||
volumes:
|
||||
# SQL Server data dir — persists the OtOpcUa ConfigDb across container recreates.
|
||||
otopcua-mssql-data:
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
# docker-dev cluster-seed entrypoint. Waits for the OtOpcUa ConfigDb schema to
|
||||
# be in place, then applies the idempotent row seed.
|
||||
# docker-dev cluster-seed entrypoint. Applies the idempotent row seed.
|
||||
#
|
||||
# IMPORTANT: this container does NOT run EF migrations — sqlcmd can't execute
|
||||
# the V2 migration script cleanly because it contains CREATE PROCEDURE
|
||||
# statements inside IF NOT EXISTS BEGIN ... END blocks (procs must be the
|
||||
# first statement in their batch). Migrations are owned by the operator:
|
||||
#
|
||||
# dotnet ef database update \
|
||||
# --project src/Core/ZB.MOM.WW.OtOpcUa.Configuration \
|
||||
# --startup-project src/Server/ZB.MOM.WW.OtOpcUa.Host
|
||||
#
|
||||
# (with ConnectionStrings__ConfigDb pointing at Server=localhost,14330;...).
|
||||
# Once the schema is in place, restart the cluster-seed container — or just
|
||||
# `docker compose up -d` and the seed will pick up where it left off thanks to
|
||||
# the IF NOT EXISTS guards in seed-clusters.sql.
|
||||
# This container does NOT run EF migrations — sqlcmd can't execute the migration
|
||||
# script cleanly (it has CREATE PROCEDURE inside IF NOT EXISTS BEGIN ... END
|
||||
# blocks; procs must be the first statement in their batch). The schema is owned
|
||||
# by the `migrator` Compose service (dotnet ef), which this seed depends on via
|
||||
# `service_completed_successfully` — so by the time we run, migrations are fully
|
||||
# applied. The dbo.ServerCluster wait below is therefore just a fast sanity check.
|
||||
# Re-runs are safe: every insert in seed-clusters.sql is IF NOT EXISTS-guarded.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -37,7 +30,7 @@ until run_sql_in master -Q "SELECT 1" >/dev/null 2>&1; do
|
||||
done
|
||||
echo "[cluster-seed] SQL Server up."
|
||||
|
||||
echo "[cluster-seed] waiting for ${DB} database + dbo.ServerCluster table (operator must run dotnet ef database update)..."
|
||||
echo "[cluster-seed] verifying ${DB} schema (dbo.ServerCluster) is present (migrator should have applied it)..."
|
||||
until run_sql_in "$DB" -Q "IF OBJECT_ID('dbo.ServerCluster') IS NULL THROW 50001, 'missing', 1; SELECT 1" >/dev/null 2>&1; do
|
||||
sleep 3
|
||||
done
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
--
|
||||
-- Populates:
|
||||
-- ServerCluster MAIN, SITE-A, SITE-B
|
||||
-- ClusterNode driver-a, driver-b → MAIN
|
||||
-- site-a-1, site-a-2 → SITE-A
|
||||
-- site-b-1, site-b-2 → SITE-B
|
||||
-- ClusterNode central-1, central-2 → MAIN
|
||||
-- site-a-1, site-a-2 → SITE-A
|
||||
-- site-b-1, site-b-2 → SITE-B
|
||||
--
|
||||
-- ServerCluster.NodeCount + RedundancyMode are coupled by CHECK constraint:
|
||||
-- NodeCount=1 ⇒ RedundancyMode='None'
|
||||
@@ -32,7 +32,7 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'MAIN')
|
||||
VALUES
|
||||
('MAIN', 'Main cluster', 'zb', 'docker-dev',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — admin-a/admin-b control-plane, driver-a/driver-b OPC UA publishers.',
|
||||
'docker-dev seed — central-1/central-2 fused admin+driver: UI + deploy singleton + MAIN OPC UA publishers.',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-A')
|
||||
@@ -41,7 +41,7 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-A')
|
||||
VALUES
|
||||
('SITE-A', 'Site A', 'zb', 'site-a',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev seed — 2-node driver-only, managed by the central cluster over the shared mesh (empty until configured).',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
@@ -50,11 +50,11 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
VALUES
|
||||
('SITE-B', 'Site B', 'zb', 'site-b',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev seed — 2-node driver-only, managed by the central cluster over the shared mesh (empty until configured).',
|
||||
'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — main cluster OPC UA publishers
|
||||
-- ClusterNode — central cluster (MAIN UI + deploy singleton + OPC UA publishers)
|
||||
--
|
||||
-- NodeId is "<compose-service>:4053" so it matches what ClusterRoleInfo +
|
||||
-- ConfigPublishCoordinator derive from Akka.Cluster.Get(system).State.Members
|
||||
@@ -62,15 +62,15 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
-- ClusterNode.NodeId; mismatched values cause FK 547 on deploy.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-a:4053')
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'central-1:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-a:4053', 'MAIN', 'driver-a', 4840, 8081, 'urn:OtOpcUa:driver-a', 200, 1, 'docker-dev-seed');
|
||||
VALUES ('central-1:4053', 'MAIN', 'central-1', 4840, 8081, 'urn:OtOpcUa:central-1', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-b:4053')
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'central-2:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-b:4053', 'MAIN', 'driver-b', 4840, 8081, 'urn:OtOpcUa:driver-b', 150, 1, 'docker-dev-seed');
|
||||
VALUES ('central-2:4053', 'MAIN', 'central-2', 4840, 8081, 'urn:OtOpcUa:central-2', 150, 1, 'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — site A
|
||||
@@ -193,3 +193,25 @@ SELECT NamespaceId, ClusterId, Kind, NamespaceUri FROM dbo.Namespace ORDER BY Cl
|
||||
SELECT DriverInstanceId, ClusterId, DriverType, NamespaceId, Name
|
||||
FROM dbo.DriverInstance ORDER BY ClusterId, DriverInstanceId;
|
||||
SELECT TagId, DriverInstanceId, FolderPath, Name, DataType FROM dbo.Tag ORDER BY DriverInstanceId, FolderPath, Name;
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- LDAP group -> AdminUI role mappings (shared dev GLAuth, 10.100.0.35)
|
||||
-- System-wide (ClusterId NULL, IsSystemWide 1). Group keys are the BARE RDN
|
||||
-- names the shared ZB.MOM.WW.Auth.Ldap returns (LdapAuthService.ToGroupShortName
|
||||
-- = first-RDN value), e.g. memberOf ou=OtOpcUa-Admins,... -> "OtOpcUa-Admins".
|
||||
-- Role is stored as the AdminRole enum NAME (HasConversion<string>).
|
||||
-- QUOTED_IDENTIFIER ON is required because the table has a filtered unique index.
|
||||
------------------------------------------------------------------------------
|
||||
SET QUOTED_IDENTIFIER ON;
|
||||
SET ANSI_NULLS ON;
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.LdapGroupRoleMapping WHERE LdapGroup = 'OtOpcUa-Admins' AND ClusterId IS NULL)
|
||||
INSERT INTO dbo.LdapGroupRoleMapping (Id, LdapGroup, Role, ClusterId, IsSystemWide, CreatedAtUtc, Notes)
|
||||
VALUES (NEWID(), 'OtOpcUa-Admins', 'Administrator', NULL, 1, SYSUTCDATETIME(), N'shared-glauth dev seed');
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.LdapGroupRoleMapping WHERE LdapGroup = 'OtOpcUa-Designers' AND ClusterId IS NULL)
|
||||
INSERT INTO dbo.LdapGroupRoleMapping (Id, LdapGroup, Role, ClusterId, IsSystemWide, CreatedAtUtc, Notes)
|
||||
VALUES (NEWID(), 'OtOpcUa-Designers', 'Designer', NULL, 1, SYSUTCDATETIME(), N'shared-glauth dev seed');
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.LdapGroupRoleMapping WHERE LdapGroup = 'OtOpcUa-Viewers' AND ClusterId IS NULL)
|
||||
INSERT INTO dbo.LdapGroupRoleMapping (Id, LdapGroup, Role, ClusterId, IsSystemWide, CreatedAtUtc, Notes)
|
||||
VALUES (NEWID(), 'OtOpcUa-Viewers', 'Viewer', NULL, 1, SYSUTCDATETIME(), N'shared-glauth dev seed');
|
||||
|
||||
SELECT LdapGroup, Role, IsSystemWide FROM dbo.LdapGroupRoleMapping ORDER BY LdapGroup;
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
# docker-dev companion to scripts/install/traefik-dynamic.yml. Routes three
|
||||
# Akka clusters that share the Compose network:
|
||||
# docker-dev companion to scripts/install/traefik-dynamic.yml. Routes the single
|
||||
# central cluster UI on the shared Compose network:
|
||||
#
|
||||
# - Main cluster (default): PathPrefix(`/`) → admin-a / admin-b.
|
||||
# - Site A cluster: Host(`site-a.localhost`) → site-a-1 / site-a-2.
|
||||
# - Site B cluster: Host(`site-b.localhost`) → site-b-1 / site-b-2.
|
||||
# - Central UI (only route): PathPrefix(`/`) → central-1 / central-2.
|
||||
#
|
||||
# Host-header rules are more specific than PathPrefix, so they win over the
|
||||
# default router for the site hostnames automatically — no priority field needed.
|
||||
# The driver-only site nodes serve no UI, so they have no Traefik route — the
|
||||
# central cluster manages and deploys to them over the shared Akka mesh.
|
||||
|
||||
http:
|
||||
routers:
|
||||
@@ -15,16 +13,6 @@ http:
|
||||
rule: "PathPrefix(`/`)"
|
||||
service: otopcua-admin
|
||||
|
||||
otopcua-site-a:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-a.localhost`)"
|
||||
service: otopcua-site-a
|
||||
|
||||
otopcua-site-b:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-b.localhost`)"
|
||||
service: otopcua-site-b
|
||||
|
||||
services:
|
||||
otopcua-admin:
|
||||
loadBalancer:
|
||||
@@ -37,44 +25,8 @@ http:
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://admin-a:9000"
|
||||
- url: "http://admin-b:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-a:
|
||||
loadBalancer:
|
||||
# Blazor Server uses SignalR; the WebSocket upgrade must hit the same
|
||||
# backend that owns the circuit ID. Sticky cookie keeps each session
|
||||
# pinned to one node so the post-handshake WebSocket doesn't 404.
|
||||
sticky:
|
||||
cookie:
|
||||
name: otopcua_lb
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://site-a-1:9000"
|
||||
- url: "http://site-a-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-b:
|
||||
loadBalancer:
|
||||
# Blazor Server uses SignalR; the WebSocket upgrade must hit the same
|
||||
# backend that owns the circuit ID. Sticky cookie keeps each session
|
||||
# pinned to one node so the post-handshake WebSocket doesn't 404.
|
||||
sticky:
|
||||
cookie:
|
||||
name: otopcua_lb
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://site-b-1:9000"
|
||||
- url: "http://site-b-2:9000"
|
||||
- url: "http://central-1:9000"
|
||||
- url: "http://central-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
|
||||
+26
-24
@@ -1,10 +1,10 @@
|
||||
# Address Space
|
||||
|
||||
Each driver's browsable subtree is built by streaming nodes from the driver's `ITagDiscovery.DiscoverAsync` implementation into an `IAddressSpaceBuilder`. `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) owns the shared orchestration; in v2 the SDK-driven materialization is handled by `OtOpcUaNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`) fed via `SdkAddressSpaceSink` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs`). The same code path serves Galaxy object hierarchies, Modbus PLC registers, AB CIP tags, TwinCAT symbols, FOCAS CNC parameters, and OPC UA Client aggregations — Galaxy is one driver of seven, not the driver.
|
||||
Address-space construction is a two-layer system. The **driver-facing layer** is the streaming builder: a driver implements `ITagDiscovery.DiscoverAsync` (`src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs`) and emits `Folder` / `Variable` / `AddProperty` calls into an `IAddressSpaceBuilder` as it walks its backend — no buffering of the whole tree. `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) wraps that builder to capture alarm-condition sinks and routes alarm events from the driver to them. The **SDK materialization layer** turns the resulting node descriptions into live OPC UA nodes: `OpcUaPublishActor` drives the write-only `IOpcUaAddressSpaceSink`, whose production binding `SdkAddressSpaceSink` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs`) forwards to `OtOpcUaNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`), a `CustomNodeManager2` subclass that owns the `FolderState` / `BaseDataVariableState` instances. The same code path serves Galaxy object hierarchies, Modbus PLC registers, AB CIP tags, TwinCAT symbols, FOCAS CNC parameters, and OPC UA Client aggregations — Galaxy is one driver of seven, not the driver.
|
||||
|
||||
## Driver root folder
|
||||
## Root folder
|
||||
|
||||
Every driver's subtree starts with a root `FolderState` under the standard OPC UA `Objects` folder, wired with an `Organizes` reference. `DriverNodeManager.CreateAddressSpace` creates this folder with `NodeId = ns;s={DriverInstanceId}`, `BrowseName = {DriverInstanceId}`, and `EventNotifier = SubscribeToEvents | HistoryRead` so alarm and history-event subscriptions can target the root. The namespace URI is `urn:OtOpcUa:{DriverInstanceId}`.
|
||||
`OtOpcUaNodeManager.CreateAddressSpace` creates a single shared root `FolderState` (`NodeId = OtOpcUa`, `BrowseName = OtOpcUa`, `EventNotifier = None`) under the standard OPC UA `Objects` folder, wired with an `Organizes` reference. Every driver's folders and variables hang beneath this one root; the server is published under a single `ApplicationUri = urn:OtOpcUa` (the `OpcUaApplicationHostOptions.ApplicationUri` default) and all nodes live in the server's single custom namespace, not a per-driver `urn:OtOpcUa:{DriverInstanceId}`. The UNS Area → Line → Equipment folder skeleton under the root is materialised by `Phase7Applier.MaterialiseHierarchy` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs`); SystemPlatform (Galaxy) tags are materialised by `Phase7Applier.MaterialiseGalaxyTags`.
|
||||
|
||||
## IAddressSpaceBuilder surface
|
||||
|
||||
@@ -14,24 +14,24 @@ Every driver's subtree starts with a root `FolderState` under the standard OPC U
|
||||
- `Variable(browseName, displayName, DriverAttributeInfo attributeInfo)` — creates a `BaseDataVariableState` and returns an `IVariableHandle` the driver keeps for alarm wiring.
|
||||
- `AddProperty(browseName, DriverDataType, value)` — attaches a `PropertyState` for static metadata (e.g. equipment identification fields).
|
||||
|
||||
Drivers drive ordering. Typical pattern: root → folder per equipment → variables per tag. `GenericDriverNodeManager` calls `DiscoverAsync` once on startup and once per rediscovery cycle.
|
||||
Drivers drive ordering. Typical pattern: root → folder per equipment → variables per tag. `GenericDriverNodeManager.BuildAddressSpaceAsync` calls `DiscoverAsync` once on startup and once per rediscovery cycle, tearing down the previous alarm subscription and clearing its sink registry before each re-walk so a redeploy doesn't double-fire alarm events.
|
||||
|
||||
## DriverAttributeInfo → OPC UA variable
|
||||
|
||||
Each variable carries a `DriverAttributeInfo` (`src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverAttributeInfo.cs`):
|
||||
|
||||
| Field | OPC UA target |
|
||||
| Field | Role |
|
||||
|---|---|
|
||||
| `FullName` | `NodeId.Identifier` — used as the driver-side lookup key for Read/Write/Subscribe |
|
||||
| `DriverDataType` | mapped to a built-in `DataTypeIds.*` NodeId via `DriverNodeManager.MapDataType` |
|
||||
| `IsArray` | `ValueRank = OneDimension` when true, `Scalar` otherwise |
|
||||
| `ArrayDim` | declared array length, carried through as metadata |
|
||||
| `SecurityClass` | stored in `_securityByFullRef` for `WriteAuthzPolicy` gating on write |
|
||||
| `IsHistorized` | flips `AccessLevel.HistoryRead` + `Historizing = true` |
|
||||
| `FullName` | driver-side full reference used as the lookup key for Read/Write/Subscribe; also seeds the variable's string `NodeId` |
|
||||
| `DriverDataType` | resolved to a built-in `DataTypeIds.*` NodeId at materialization time — `OtOpcUaNodeManager.ResolveBuiltInDataType` maps the data-type name string; each driver first maps its native type into `DriverDataType` (e.g. Galaxy via `Browse/DataTypeMap.Map`) |
|
||||
| `IsArray` / `ArrayDim` | declared 1-D-array length carried as metadata; the Galaxy discoverer sets `ArrayDim` only when the gateway reports a positive dimension |
|
||||
| `SecurityClass` | write-authorization tier (`SecurityClassification`); enforced server-side by the `NodePermissions` ACL evaluator (`TriePermissionEvaluator`) mapping each `OpcUaOperation` to a required permission bit. The Galaxy driver also caches it per full reference (`_securityByFullRef`) to answer `GetSecurityClassification` |
|
||||
| `IsHistorized` | marks the attribute as feeding historian / HistoryRead |
|
||||
| `IsAlarm` | drives the `MarkAsAlarmCondition` pass (see below) |
|
||||
| `WriteIdempotent` | stored in `_writeIdempotentByFullRef`; fed to `CapabilityInvoker.ExecuteWriteAsync` |
|
||||
| `WriteIdempotent` | when true the attribute's writes are safe to replay, so the capability invoker may apply Polly retry; defaults false so pulses / acks / counters aren't auto-retried |
|
||||
| `Source` | `NodeSourceKind` discriminator (`Driver` / `Virtual` / `ScriptedAlarm`) that decides which subsystem dispatches the node's Read/Write/Subscribe |
|
||||
|
||||
The initial value stays `null` with `StatusCode = BadWaitingForInitialData` until the first Read or `ISubscribable.OnDataChange` push lands.
|
||||
The variable is created with `StatusCode = BadWaitingForInitialData` and a null value until the first Read or `ISubscribable.OnDataChange` push lands. Note the production SDK sink (`OtOpcUaNodeManager.EnsureVariable`) currently materialises every variable as `ValueRank = Scalar`, read-only `AccessLevel`, and `Historizing = false` — the `IsArray`/`IsHistorized` intent lives in `DriverAttributeInfo` but is not yet projected onto the SDK node.
|
||||
|
||||
## CapturingBuilder + alarm sink registration
|
||||
|
||||
@@ -39,34 +39,36 @@ The initial value stays `null` with `StatusCode = BadWaitingForInitialData` unti
|
||||
|
||||
## NodeId scheme
|
||||
|
||||
All nodes live in the driver's namespace (not a shared `ns=1`). Browse paths are driver-defined:
|
||||
All nodes share the server's single custom namespace (`NamespaceIndex`); NodeIds are string identifiers, not numeric. The string values come from the source rows / driver references — there is no per-driver namespace prefix:
|
||||
|
||||
| Node type | NodeId format | Example |
|
||||
| Node type | NodeId (string identifier) | Example |
|
||||
|---|---|---|
|
||||
| Driver root | `ns;s={DriverInstanceId}` | `urn:OtOpcUa:galaxy-01;s=galaxy-01` |
|
||||
| Folder | `ns;s={parent}/{browseName}` | `ns;s=galaxy-01/Area_001` |
|
||||
| Variable | `ns;s={DriverAttributeInfo.FullName}` | `ns;s=DelmiaReceiver_001.DownloadPath` |
|
||||
| Alarm condition | `ns;s={FullReference}.Condition` | `ns;s=DelmiaReceiver_001.Temperature.Condition` |
|
||||
| Shared root | `OtOpcUa` | `OtOpcUa` |
|
||||
| UNS Area / Line / Equipment folder | the Config-DB `UnsAreaId` / `UnsLineId` / `EquipmentId` | `EQ_Press_07` |
|
||||
| Galaxy tag variable | the MXAccess reference (`Phase7Applier` uses `GalaxyTagPlan.MxAccessRef`) | `DelmiaReceiver_001.DownloadPath` |
|
||||
| Equipment tag variable | the driver full reference from `DriverAttributeInfo.FullName` | driver-specific |
|
||||
|
||||
For Galaxy the `FullName` stays in the legacy `tag_name.AttributeName` format; Modbus uses `unit:register:type`; AB CIP uses the native `program:tag.member` path; etc. — the shape is the driver's choice.
|
||||
For Galaxy the variable `FullName` is the `tag_name.AttributeName` MXAccess reference; AB CIP uses `tag.Name` or `tag.Name.member` for UDT members; the shape is the driver's choice. Browse-path resolution (OPC UA `TranslateBrowsePathsToNodeIds`) is the canonical way clients map a browse path to one of these flat NodeIds.
|
||||
|
||||
## Per-driver hierarchy examples
|
||||
|
||||
- **Galaxy Proxy**: walks the DB-snapshot hierarchy (`GalaxyProxyDriver.DiscoverAsync`), streams Area objects as folders and non-area objects as variable-bearing folders, marks `IsAlarm = true` on attributes that have an `AlarmExtension` primitive. The v1 two-pass primitive-grouping logic is retained inside the Galaxy driver.
|
||||
- **Galaxy**: `GalaxyDriver.DiscoverAsync` delegates to `GalaxyDiscoverer` (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Browse/GalaxyDiscoverer.cs`), which walks the hierarchy from `IGalaxyHierarchySource` — one folder per Galaxy object (browse name = `contained_name`, falling back to `tag_name`), one variable per dynamic attribute (full reference = `tag_name.AttributeName`). It copies the gateway-supplied `IsAlarm` flag through to `DriverAttributeInfo` and, for alarm-bearing attributes, calls `MarkAsAlarmCondition` with the five sub-attribute refs built by `AlarmRefBuilder`.
|
||||
- **Modbus**: streams one folder per device, one variable per register range from `ModbusDriverOptions`. No alarm surface.
|
||||
- **AB CIP**: uses `AbCipTemplateCache` to enumerate user-defined types, streams a folder per program with variables keyed on the native tag path.
|
||||
- **OPC UA Client**: re-exposes a remote server's address space — browses the upstream and relays nodes through the builder.
|
||||
- **AB CIP**: `AbCipDriver.DiscoverAsync` emits an `AbCip` root, then a folder per configured device. Pre-declared tags become variables under the device folder; UDT (`Structure`) tags fan out into a sub-folder with one variable per member; when controller browse is enabled, `IAbCipTagEnumerator` adds discovered tags under a `Discovered/` sub-folder. (`AbCipTemplateCache` caches UDT layouts for the libplctag enumerator.)
|
||||
- **OPC UA Client**: re-exposes a remote server's address space — `OpcUaClientDriver.DiscoverAsync` browses the upstream from `BrowseRoot` into a `Remote` folder (pass 1), then batch-reads DataType/AccessLevel/ValueRank/Historizing per variable before registering them (pass 2).
|
||||
|
||||
See `docs/v2/driver-specs.md` for the per-driver discovery contracts.
|
||||
|
||||
## Rediscovery
|
||||
|
||||
Drivers that implement `IRediscoverable` fire `OnRediscoveryNeeded` when their backend signals a change (Galaxy: `time_of_last_deploy` advance; TwinCAT: symbol-version-changed; OPC UA Client: server namespace change). Core re-runs `DiscoverAsync` and diffs — see `docs/IncrementalSync.md`. Static drivers (Modbus, S7) don't implement `IRediscoverable`; their address space only changes when a new generation is published from the Config DB.
|
||||
Drivers that implement `IRediscoverable` fire `OnRediscoveryNeeded` when their backend signals a change. Galaxy's `DeployWatcher` raises it when the observed `time_of_last_deploy` advances; TwinCAT raises it on the ADS symbol-version-changed signal (`DeviceSymbolVersionInvalid`, error 1809). Core re-runs `DiscoverAsync` and diffs — see `docs/IncrementalSync.md`. Drivers that don't implement `IRediscoverable` (Modbus, S7, OPC UA Client) only change their address space when a new generation is published from the Config DB.
|
||||
|
||||
## Key source files
|
||||
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — orchestration + `CapturingBuilder`
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`, `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs` — OPC UA materialization (write-only sink fed by the actor system)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — materialises the UNS folder hierarchy + Galaxy tags into the sink
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/EquipmentNodeWalker.cs` — walks Config-DB Equipment-namespace rows into the builder
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IAddressSpaceBuilder.cs` — builder contract
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs` — driver discovery capability
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverAttributeInfo.cs` — per-attribute descriptor
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
# Alarm Historian — store-and-forward SQLite sink
|
||||
|
||||
Reference for `ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian`
|
||||
([`src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/)),
|
||||
the durable local queue that historizes alarm transitions to AVEVA Historian
|
||||
without ever blocking the alarm engine or operator actions.
|
||||
|
||||
This is the *sink mechanics* doc. For how the three alarm sources converge on
|
||||
the OPC UA Part 9 surface and which alarms route here, see
|
||||
[AlarmTracking.md](AlarmTracking.md). For the historian client that drains this
|
||||
queue, see [DriverLifecycle.md](DriverLifecycle.md#ihistoriandatasource--server-side-historian-read-surface)
|
||||
and [ServiceHosting.md](ServiceHosting.md).
|
||||
|
||||
---
|
||||
|
||||
## Why store-and-forward
|
||||
|
||||
Scripted alarms (and any future non-Galaxy `IAlarmSource`, e.g. AB CIP ALMD)
|
||||
must reach AVEVA Historian, but the historian sidecar can be slow, busy, or
|
||||
disconnected. The sink decouples the alarm engine from historian reachability:
|
||||
every qualifying transition is committed to a **local SQLite queue first**, and
|
||||
a background drain worker forwards rows to the historian on a backoff-aware
|
||||
cadence. Operator acks and alarm-state transitions are never blocked waiting on
|
||||
the historian.
|
||||
|
||||
> Galaxy-native alarms with `$Alarm*` extensions reach AVEVA Historian directly
|
||||
> via System Platform's `HistorizeToAveva` toggle — they do **not** flow through
|
||||
> this sink. This path is exclusively for non-Galaxy alarm producers.
|
||||
|
||||
---
|
||||
|
||||
## Contracts
|
||||
|
||||
All in
|
||||
[`IAlarmHistorianSink.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/IAlarmHistorianSink.cs)
|
||||
unless noted.
|
||||
|
||||
- **`IAlarmHistorianSink`** — the intake contract. `EnqueueAsync(evt, ct)`
|
||||
durably enqueues an event and returns as soon as the queue row is committed
|
||||
(fire-and-forget from the engine's perspective; the sink must not block the
|
||||
emitting thread). `GetStatus()` returns a `HistorianSinkStatus` snapshot.
|
||||
- **`NullAlarmHistorianSink`** — the no-op default for tests and deployments
|
||||
that don't historize alarms. It is the default DI binding (registered in the
|
||||
Runtime's `AddOtOpcUaRuntime`); production overrides it with
|
||||
`SqliteStoreAndForwardSink`.
|
||||
- **`AlarmHistorianEvent`**
|
||||
([`AlarmHistorianEvent.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/AlarmHistorianEvent.cs))
|
||||
— the source-agnostic event record: `AlarmId`, `EquipmentPath` (UNS path,
|
||||
doubles as Historian's SourceNode), `AlarmName`, `AlarmTypeName` (Part 9
|
||||
subtype), `Severity`, `EventKind` (free-form transition string —
|
||||
"Activated"/"Cleared"/"Acknowledged"/etc.), `Message`, `User`, `Comment`,
|
||||
`TimestampUtc`.
|
||||
- **`IAlarmHistorianWriter`** — what the drain worker delegates writes to.
|
||||
`WriteBatchAsync(batch, ct)` returns one `HistorianWriteOutcome` per event,
|
||||
in order. Production binds this to `WonderwareHistorianClient` (the AVEVA
|
||||
Historian sidecar IPC client).
|
||||
- **`HistorianWriteOutcome`** — per-event drain result: `Ack` (persisted,
|
||||
remove from queue), `RetryPlease` (transient failure — leave queued, retry
|
||||
after backoff), `PermanentFail` (malformed/unrecoverable — move to
|
||||
dead-letter).
|
||||
- **`HistorianSinkStatus`** — diagnostic snapshot surfaced to the AdminUI and
|
||||
`/healthz`: `QueueDepth`, `DeadLetterDepth`, `LastDrainUtc`, `LastSuccessUtc`,
|
||||
`LastError`, `DrainState`, and `EvictedCount`.
|
||||
- **`HistorianDrainState`** — `Disabled` / `Idle` / `Draining` / `BackingOff`.
|
||||
|
||||
---
|
||||
|
||||
## SqliteStoreAndForwardSink
|
||||
|
||||
[`SqliteStoreAndForwardSink.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/SqliteStoreAndForwardSink.cs)
|
||||
is the production `IAlarmHistorianSink`. Construction takes a SQLite database
|
||||
path, an `IAlarmHistorianWriter`, a logger, and optional `batchSize` (default
|
||||
100), `capacity` (default 1,000,000), `deadLetterRetention` (default 30 days),
|
||||
and a test clock.
|
||||
|
||||
### Queue table
|
||||
|
||||
The sink owns one SQLite table (created on construction, WAL journal mode):
|
||||
|
||||
```sql
|
||||
CREATE TABLE Queue (
|
||||
RowId INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
AlarmId TEXT NOT NULL,
|
||||
EnqueuedUtc TEXT NOT NULL,
|
||||
PayloadJson TEXT NOT NULL, -- JSON-serialized AlarmHistorianEvent
|
||||
AttemptCount INTEGER NOT NULL DEFAULT 0,
|
||||
LastAttemptUtc TEXT NULL,
|
||||
LastError TEXT NULL,
|
||||
DeadLettered INTEGER NOT NULL DEFAULT 0
|
||||
);
|
||||
CREATE INDEX IX_Queue_Drain ON Queue (DeadLettered, RowId);
|
||||
```
|
||||
|
||||
`EnqueueAsync` does a single `INSERT` on the hot path. To avoid a
|
||||
`SELECT COUNT(*)` on every enqueue, the sink keeps an in-memory non-dead-lettered
|
||||
row counter (seeded at startup, kept current by every mutation, and re-synced
|
||||
from storage every 10,000 enqueues to defend against drift). SQLite writer
|
||||
contention is handled via `PRAGMA busy_timeout=5000` + WAL so an enqueue/drain
|
||||
collision waits out the file lock instead of failing fast.
|
||||
|
||||
### Drain worker
|
||||
|
||||
`StartDrainLoop(tickInterval)` starts a **self-rescheduling one-shot
|
||||
`System.Threading.Timer`** (not started automatically — tests drive
|
||||
`DrainOnceAsync` deterministically). Each tick:
|
||||
|
||||
1. Purges aged dead-lettered rows past the retention window.
|
||||
2. Reads up to `batchSize` non-dead-lettered rows in `RowId` order.
|
||||
3. Rows with un-deserializable payloads are dead-lettered immediately (by their
|
||||
own `RowId`) so they can't stall the queue head.
|
||||
4. The remaining batch is handed to `IAlarmHistorianWriter.WriteBatchAsync`, and
|
||||
each outcome is applied in one transaction: `Ack` deletes the row,
|
||||
`PermanentFail` flips its `DeadLettered` flag, `RetryPlease` bumps its attempt
|
||||
count and leaves it queued.
|
||||
5. The timer re-arms its next due-time to `max(tickInterval, currentBackoff)`.
|
||||
|
||||
**Backoff ladder** (applied to the timer's next due-time, so a historian outage
|
||||
genuinely slows the drain cadence): 1s → 2s → 5s → 15s → 60s cap. Any
|
||||
`RetryPlease` outcome — or a writer exception, or a writer cardinality violation
|
||||
(outcome count ≠ event count) — bumps the backoff and sets `DrainState =
|
||||
BackingOff`; a clean batch resets it. The async-void timer callback is fully
|
||||
guarded: a fault is logged and recorded into `GetStatus()` rather than lost as
|
||||
an unobserved task exception.
|
||||
|
||||
### Durability bound (important)
|
||||
|
||||
**The durability guarantee is bounded by `capacity` (default 1,000,000 rows).**
|
||||
When the non-dead-lettered queue reaches capacity, `EnqueueAsync` evicts the
|
||||
oldest non-dead-lettered rows (oldest `RowId` first) to make room, logs a WARN,
|
||||
and increments `HistorianSinkStatus.EvictedCount`. Under a sustained historian
|
||||
outage, accepted alarm events can therefore be dropped before delivery. A
|
||||
non-zero `EvictedCount` is a data-loss signal that requires operator attention —
|
||||
it surfaces silent loss without log scraping.
|
||||
|
||||
### Dead-letter + operator recovery
|
||||
|
||||
`PermanentFail` and corrupt-payload rows are retained in-place with
|
||||
`DeadLettered = 1` for the retention window (default 30 days) so operators can
|
||||
inspect them before the sweeper purges them. `RetryDeadLettered()` is the
|
||||
operator action (from the AdminUI) that clears the dead-letter flag and attempt
|
||||
count on every dead-lettered row, returning them to the regular queue with a
|
||||
fresh backoff.
|
||||
|
||||
---
|
||||
|
||||
## Runtime wiring
|
||||
|
||||
Production routes alarm transitions through the Akka cluster. The
|
||||
`HistorianAdapterActor`
|
||||
([`Runtime/Historian/HistorianAdapterActor.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Historian/HistorianAdapterActor.cs))
|
||||
bridges messages from the scripted-alarm actor into the sink's `EnqueueAsync`,
|
||||
fire-and-forget so the actor loop is never blocked on historian reachability.
|
||||
The `WonderwareHistorianClient` is the `IAlarmHistorianWriter` the drain worker
|
||||
delegates to. See [ServiceHosting.md](ServiceHosting.md) for the sidecar setup.
|
||||
|
||||
---
|
||||
|
||||
## See also
|
||||
|
||||
- [AlarmTracking.md](AlarmTracking.md) — the three alarm sources and the OPC UA
|
||||
Part 9 surface; which alarms route to this sink.
|
||||
- [DriverLifecycle.md](DriverLifecycle.md) — `IHistorianDataSource` (the
|
||||
historian *read* surface; this page covers the *write* path) and the
|
||||
`WonderwareHistorianClient`.
|
||||
- [ScriptedAlarms.md](ScriptedAlarms.md) — the scripted-alarm engine that emits
|
||||
most events into this sink.
|
||||
- [ServiceHosting.md](ServiceHosting.md) — the optional Wonderware historian
|
||||
sidecar.
|
||||
+20
-18
@@ -13,7 +13,7 @@ historical reference.
|
||||
|----------------------------------|--------------------------|------|
|
||||
| **Galaxy MxAccess (driver-native)** | `GalaxyDriver : IAlarmSource` | gateway → worker → MxAccess alarm sink → `MX_EVENT_FAMILY_ON_ALARM_TRANSITION` → `EventPump` → driver `OnAlarmEvent` → `AlarmConditionService` |
|
||||
| **Galaxy sub-attribute fallback** | `IWritable` writes to `$Alarm*` sub-attributes | gateway data subscription → driver `OnDataChange` → `DriverNodeManager` ConditionSink → `AlarmConditionService` |
|
||||
| **Scripted alarms** | `Phase7EngineComposer` | server-side script evaluator → `Phase7EngineComposer.RouteToHistorianAsync` + `AlarmConditionService` |
|
||||
| **Scripted alarms** | `Phase7Composer` | server-side script evaluator → `ScriptedAlarmActor` transitions → `HistorianAdapterActor` → `IAlarmHistorianSink` |
|
||||
|
||||
All three converge on the alarm-state actor — in v2 the OPC UA Part 9 state
|
||||
machine lives inside `ScriptedAlarmActor`
|
||||
@@ -104,23 +104,25 @@ calls.
|
||||
Scripted alarms (and any future non-Galaxy `IAlarmSource` like
|
||||
AB CIP ALMD) route to AVEVA Historian via the Wonderware sidecar:
|
||||
|
||||
- `Phase7Composer.ResolveHistorianSink` resolves an
|
||||
`IAlarmHistorianWriter` from either a driver that natively
|
||||
implements it or the DI-registered `WonderwareHistorianClient`
|
||||
(the sidecar IPC client). Driver-provided wins when both are
|
||||
present.
|
||||
- `IAlarmHistorianSink` is the DI-registered intake contract. The
|
||||
default binding is `NullAlarmHistorianSink` (registered in
|
||||
`ServiceCollectionExtensions.AddOtOpcUaRuntime`). Production
|
||||
deployments override it with `SqliteStoreAndForwardSink` wrapping
|
||||
`WonderwareHistorianClient` (the AVEVA Historian sidecar IPC client)
|
||||
— see [ServiceHosting.md](ServiceHosting.md) for the sidecar setup.
|
||||
- `SqliteStoreAndForwardSink` queues each transition to a local
|
||||
SQLite database and drains in the background via the resolved
|
||||
writer. **The durability guarantee is bounded**: the queue capacity
|
||||
defaults to 1,000,000 rows; under a sustained historian outage,
|
||||
older non-dead-lettered rows are evicted (oldest first) to make
|
||||
room for new events. The `HistorianSinkStatus.EvictedCount` counter
|
||||
surfaces lifetime eviction events to the Admin UI
|
||||
`/alarms/historian` diagnostics page so operators can detect silent
|
||||
data loss without log scraping.
|
||||
- Sidecar (PR C.1 + C.2) forwards the events to `aahClientManaged`'s
|
||||
alarm-event write API; the live SDK call site is pinned during
|
||||
PR D.1's deploy-rig validation.
|
||||
SQLite database and drains in the background via an
|
||||
`IAlarmHistorianWriter`. **The durability guarantee is bounded**: the
|
||||
queue capacity defaults to 1,000,000 rows; under a sustained
|
||||
historian outage, older non-dead-lettered rows are evicted (oldest
|
||||
first) to make room for new events. The `HistorianSinkStatus.EvictedCount`
|
||||
counter surfaces lifetime eviction events so operators can detect
|
||||
silent data loss without log scraping.
|
||||
- `HistorianAdapterActor`
|
||||
(`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Historian/HistorianAdapterActor.cs`)
|
||||
bridges Akka cluster messages from `ScriptedAlarmActor` into the
|
||||
sink's `EnqueueAsync`; fire-and-forget so the actor loop is never
|
||||
blocked on historian reachability.
|
||||
|
||||
Galaxy-native alarms with `$Alarm*` extensions reach AVEVA Historian
|
||||
directly via System Platform's `HistorizeToAveva` toggle on the
|
||||
@@ -133,4 +135,4 @@ exclusively for non-Galaxy alarm producers.
|
||||
- v1 archive: [docs/v1/AlarmTracking.md](v1/AlarmTracking.md)
|
||||
- Galaxy driver: [docs/drivers/Galaxy.md](drivers/Galaxy.md)
|
||||
- Phase 7 scripting + alarming: [docs/v2/implementation/phase-7-scripting-and-alarming.md](v2/implementation/phase-7-scripting-and-alarming.md)
|
||||
- Security + ACL: [docs/Security.md](Security.md)
|
||||
- Security + ACL: [docs/security.md](security.md)
|
||||
|
||||
+2
-2
@@ -219,7 +219,7 @@ otopcua-cli historyread -u opc.tcp://localhost:4840/OtOpcUa \
|
||||
| `Count` | | `AggregateFunction_Count` |
|
||||
| `Start` | `first` | `AggregateFunction_Start` |
|
||||
| `End` | `last` | `AggregateFunction_End` |
|
||||
| `StandardDeviation` | `stddev`, `stdev` | `AggregateFunction_StandardDeviationSample` |
|
||||
| `StandardDeviation` | `stddev`, `stdev` | `AggregateFunction_StandardDeviationPopulation` |
|
||||
|
||||
### alarms
|
||||
|
||||
@@ -261,7 +261,7 @@ Application URI: urn:localhost:OtOpcUa:instance1
|
||||
|
||||
## Testing
|
||||
|
||||
The Client CLI has 52 unit tests covering option parsing, service invocation, output formatting, and cleanup behavior:
|
||||
The Client CLI has 77 unit tests covering option parsing, service invocation, output formatting, and cleanup behavior:
|
||||
|
||||
```bash
|
||||
dotnet test tests/Client/ZB.MOM.WW.OtOpcUa.Client.CLI.Tests
|
||||
|
||||
+2
-2
@@ -65,7 +65,7 @@ The top bar provides the endpoint URL, Connect, and Disconnect buttons. The **Co
|
||||
|
||||
### Settings Persistence
|
||||
|
||||
Connection settings are saved to `{LocalAppData}/OtOpcUaClient/settings.json` after each successful connection and on window close. Dev boxes upgrading from a pre-task-#208 build still have the legacy `LmxOpcUaClient/` folder on disk; `ClientStoragePaths` in `Client.Shared` moves it to the canonical path on first launch so existing trusted certs + saved settings persist without operator action. The settings are reloaded on next launch, including:
|
||||
Connection settings are saved to `{LocalAppData}/OtOpcUaClient/settings.json` after each successful connection, on disconnect, and on window close. Dev boxes upgrading from a pre-task-#208 build still have the legacy `LmxOpcUaClient/` folder on disk; `ClientStoragePaths` in `Client.Shared` moves it to the canonical path on first launch so existing trusted certs + saved settings persist without operator action. The settings are reloaded on next launch, including:
|
||||
|
||||
- All connection parameters
|
||||
- Active subscription node IDs (restored after reconnection)
|
||||
@@ -100,7 +100,7 @@ Select a node in the browse tree to auto-read its current value. The tab display
|
||||
- Status code (e.g., `0x00000000 (Good)`)
|
||||
- Source and server timestamps
|
||||
|
||||
To write a value, enter the new value and click Send. The service reads the current value first to determine the target type, then converts and writes.
|
||||
To write a value, enter the new value and click Write. The shared `OpcUaClientService.WriteValueAsync` pre-reads the node's current value to determine its type, then calls `ValueConverter.ConvertValue` to produce a typed value client-side before sending a typed `DataValue` to the server. Type resolution happens in the client, not on the server.
|
||||
|
||||
## Subscriptions Tab
|
||||
|
||||
|
||||
@@ -0,0 +1,183 @@
|
||||
# Configuration Reference
|
||||
|
||||
This is the live configuration reference for the OtOpcUa Host (`src/Server/ZB.MOM.WW.OtOpcUa.Host/`). It enumerates the `appsettings*.json` sections, the bound Options classes, and the `OTOPCUA_*` / sim-endpoint environment variables — every entry grounded in source.
|
||||
|
||||
Two related concerns get their own dedicated pages and are **only summarised + linked** here, not duplicated:
|
||||
|
||||
- **Transport security, OPC UA authentication, LDAP, data-/control-plane authorization** → [`security.md`](security.md)
|
||||
- **Redundancy + the `Cluster` section** → [`Redundancy.md`](Redundancy.md)
|
||||
|
||||
## How configuration is layered
|
||||
|
||||
The Host (`Program.cs`) loads `appsettings.json`, then overlays a **per-role** file chosen from the cluster roles:
|
||||
|
||||
- A single role → `appsettings.{role}.json` (e.g. `appsettings.driver.json`, `appsettings.admin.json`).
|
||||
- Both roles → `appsettings.admin-driver.json` (roles joined with `-`, ordinal-sorted).
|
||||
- `appsettings.{ASPNETCORE_ENVIRONMENT}.json` (e.g. `appsettings.Development.json`) is layered on by the host builder.
|
||||
|
||||
All role overlays are **optional** — the base `appsettings.json` plus the Options-class C# defaults are enough to boot. The roles themselves come from the `OTOPCUA_ROLES` env var (see [`ServiceHosting.md`](ServiceHosting.md) and the table below).
|
||||
|
||||
The checked-in `appsettings*.json` files are deliberately thin: they carry only `Serilog` and the `Security:Ldap` overlay. Everything else (`OpcUa`, `Cluster`, `ConnectionStrings`/`ConfigDb`) binds from the Options-class defaults documented below unless an operator adds the section explicitly or supplies the corresponding environment variable.
|
||||
|
||||
---
|
||||
|
||||
## `appsettings` sections
|
||||
|
||||
### `Serilog`
|
||||
|
||||
- **Purpose:** logging. Console + rolling daily file sink, layered with the shared `ZB.MOM.WW.Telemetry` enrichers (`AddZbSerilog` in `Program.cs`).
|
||||
- **Where bound:** `builder.AddZbSerilog(...)` reads `Serilog` from configuration (`ReadFrom.Configuration`).
|
||||
- **Checked-in shape** (`appsettings.json`): `Using` = `[ "Serilog.Sinks.Console", "Serilog.Sinks.File" ]`, `WriteTo` = a `Console` sink and a `File` sink (`path: logs/otopcua-.log`, `rollingInterval: Day`). Role overlays add `MinimumLevel` / `Override` blocks (e.g. `Opc.Ua: Debug`, `Akka: Information`).
|
||||
|
||||
### `OpcUa`
|
||||
|
||||
- **Purpose:** the OPC UA server endpoint identity, listening port, PKI, transport-security profiles, and redundancy peer advertising.
|
||||
- **Options class:** `OpcUaApplicationHostOptions` — `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs`.
|
||||
- **Bound by:** `AddValidatedOptions<OpcUaApplicationHostOptions, OpcUaApplicationHostOptionsValidator>(config, "OpcUa")` in `Program.cs` (driver-role only). Validated fail-fast at startup by `OpcUaApplicationHostOptionsValidator` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/Configuration/OpcUaApplicationHostOptionsValidator.cs`).
|
||||
|
||||
| Key | Type | Default | Meaning |
|
||||
|---|---|---|---|
|
||||
| `ApplicationName` | string | `OtOpcUa` | Server application name. Required (validated). |
|
||||
| `ApplicationUri` | string | `urn:OtOpcUa` | Server application URI. Must be unique per redundancy node. Required. |
|
||||
| `ProductUri` | string | `https://zb.com/otopcua` | Product URI. Not validated. |
|
||||
| `OpcUaPort` | int | `4840` | Binary endpoint listen port. Validated as a port. |
|
||||
| `PublicHostname` | string | `0.0.0.0` | Hostname/IP advertised in endpoint descriptions. Required. |
|
||||
| `ApplicationConfigPath` | string? | `null` | Optional path to an application config XML; loaded instead of building from defaults. |
|
||||
| `PkiStoreRoot` | string | `pki` | Root of the PKI hierarchy (`own`/`issuer`/`trusted`/`rejected` substores created under it). Required. See [`security.md`](security.md). |
|
||||
| `EnabledSecurityProfiles` | list of `OpcUaSecurityProfile` | `[None, Basic256Sha256Sign, Basic256Sha256SignAndEncrypt]` | Transport-security profiles, one endpoint per entry. Must contain ≥1. Profile detail in [`security.md`](security.md). |
|
||||
| `AutoAcceptUntrustedClientCertificates` | bool | `false` | Auto-trust unknown client certs on first connect (dev convenience). Not validated. See [`security.md`](security.md). |
|
||||
| `PeerApplicationUris` | list of string | `[]` (empty) | Partner node `ApplicationUri`s published in `Server.ServerArray` for redundancy discovery. See [`Redundancy.md`](Redundancy.md). |
|
||||
|
||||
> **Transport security profiles** (the values in `EnabledSecurityProfiles` — `None`, `Basic256Sha256Sign`, `Basic256Sha256SignAndEncrypt`) and the PKI trust flow are documented in full in [`security.md`](security.md). This page does not duplicate them.
|
||||
|
||||
### `Security`
|
||||
|
||||
- **Purpose:** Admin-UI and OPC UA authentication. Three subsections, each its own Options class:
|
||||
|
||||
| Subsection | Options class (`SectionName`) | Purpose |
|
||||
|---|---|---|
|
||||
| `Security:Ldap` | `LdapOptions` — `src/Server/ZB.MOM.WW.OtOpcUa.Security/Ldap/LdapOptions.cs` | LDAP bind for Admin cookie login + OPC UA UserName tokens. Bound by `AddValidatedOptions<LdapOptions, LdapOptionsValidator>` in `Program.cs`. |
|
||||
| `Security:Jwt` | `JwtOptions` — `src/Server/ZB.MOM.WW.OtOpcUa.Security/Jwt/JwtOptions.cs` | Signing config for the JWT minted at `/auth/token` for **external** consumers (OPC UA clients / automation). |
|
||||
| `Security:Cookie` | `OtOpcUaCookieOptions` — `src/Server/ZB.MOM.WW.OtOpcUa.Security/CookieOptions.cs` | The Admin-UI auth cookie (`AddOtOpcUaAuth` copies these onto `CookieAuthenticationOptions`). |
|
||||
|
||||
**`Security:Ldap` — see [`security.md`](security.md) for the full field-by-field reference and bind-flow.** The checked-in role overlays set only `DevStubMode` and `Transport`; the remaining `LdapOptions` fields (`Enabled`, `Server`, `Port`, `AllowInsecure`, `SearchBase`, `ServiceAccountDn`, `ServiceAccountPassword`, `GroupAttribute`, `DisplayNameAttribute`, `UserNameAttribute`, `GroupToRole`) are covered there.
|
||||
|
||||
**`Security:Jwt`** key fields (`JwtOptions`):
|
||||
|
||||
| Key | Type | Default | Meaning |
|
||||
|---|---|---|---|
|
||||
| `SigningKey` | string | `""` | HS256 signing key; must be ≥32 bytes UTF-8. Set from your secret store — never commit a value. |
|
||||
| `Issuer` | string | `otopcua` | JWT issuer. |
|
||||
| `Audience` | string | `otopcua` | JWT audience. |
|
||||
| `ExpiryMinutes` | int | `15` | Token lifetime. |
|
||||
|
||||
**`Security:Cookie`** key fields (`OtOpcUaCookieOptions`):
|
||||
|
||||
| Key | Type | Default | Meaning |
|
||||
|---|---|---|---|
|
||||
| `Name` | string | `ZB.MOM.WW.OtOpcUa.Auth` | Auth cookie name. Changing it invalidates existing sessions on next deploy. |
|
||||
| `ExpiryMinutes` | int | `30` | Idle sliding-window length. |
|
||||
| `RequireHttpsCookie` | bool | `true` | `SecurePolicy = Always`. Set `false` only for plain-HTTP local dev (emits a startup Warning). |
|
||||
|
||||
> Authentication, data-plane authorization (`NodeAcl` / `PermissionTrie`), and control-plane Admin roles are all in [`security.md`](security.md).
|
||||
|
||||
### `Cluster`
|
||||
|
||||
- **Purpose:** Akka.NET cluster identity, transport, and roles — the backbone of redundancy.
|
||||
- **Options class:** `AkkaClusterOptions` (`SectionName = "Cluster"`) — `src/Core/ZB.MOM.WW.OtOpcUa.Cluster/AkkaClusterOptions.cs`. Bound by `AddOtOpcUaCluster(config)` in `Program.cs`.
|
||||
|
||||
| Key | Type | Default | Meaning |
|
||||
|---|---|---|---|
|
||||
| `SystemName` | string | `otopcua` | Akka actor-system name. |
|
||||
| `Hostname` | string | `0.0.0.0` | Bind hostname. |
|
||||
| `Port` | int | `4053` | Cluster transport port. |
|
||||
| `PublicHostname` | string | `127.0.0.1` | Hostname advertised in cluster gossip; must be reachable by peers. |
|
||||
| `SeedNodes` | string[] | `[]` | Seed nodes for bootstrapping. |
|
||||
| `Roles` | string[] | `[]` | Cluster roles for this node. When empty, falls back to `OTOPCUA_ROLES`. Allowed values: `admin`, `driver`, `dev`. |
|
||||
|
||||
> The full redundancy model (ServiceLevel tiers, split-brain, peer discovery) is in [`Redundancy.md`](Redundancy.md). The OPC UA peer-URI advertising lives in the `OpcUa:PeerApplicationUris` key above.
|
||||
|
||||
### `ConnectionStrings` → `ConfigDb`
|
||||
|
||||
- **Purpose:** the central Config DB connection string. **Required for every role** — `Program.cs` calls `AddOtOpcUaConfigDb` unconditionally.
|
||||
- **Bound by:** `AddOtOpcUaConfigDb(config)` (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/ServiceCollectionExtensions.cs`). The connection-string name constant is `ConnectionStringName = "ConfigDb"`, read via `configuration.GetConnectionString("ConfigDb")`. If absent, startup throws with a message pointing to either `appsettings.json` or the `OTOPCUA_CONFIG_CONNECTION` env var.
|
||||
- **Shape:** standard `ConnectionStrings:ConfigDb` SQL Server connection string. There is no checked-in default in the thin `appsettings*.json` — supply it per environment.
|
||||
|
||||
The Config DB itself (the EF Core `OtOpcUaConfigDbContext`, entities, draft/publish generations, `NodeAcl`, `LdapGroupRoleMapping`, migrations) is the durable home for the fleet's drivers, UNS hierarchy, ACLs, and audit log. For the **full schema** see [`docs/v2/config-db-schema.md`](v2/config-db-schema.md). This page does not duplicate it.
|
||||
|
||||
### Galaxy / MxAccess driver config (`DriverConfig` JSON, not `appsettings`)
|
||||
|
||||
The Galaxy/MxAccess connection settings are **not an `appsettings` section.** They are driver-instance options stored in the `DriverConfig` JSON column of the Config DB (edited via the Admin UI), bound to `GalaxyDriverOptions` (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Contracts/GalaxyDriverOptions.cs`, namespace `...Driver.Galaxy.Config`). It decomposes into nested records:
|
||||
|
||||
| Record | Key fields (default) | Meaning |
|
||||
|---|---|---|
|
||||
| `GalaxyGatewayOptions` (`Gateway`) | `Endpoint`; `ApiKeySecretRef`; `UseTls` (`true`); `CaCertificatePath` (`null`); `ConnectTimeoutSeconds` (`10`); `DefaultCallTimeoutSeconds` (`30`); `StreamTimeoutSeconds` (`0` = unlimited) | mxaccessgw gateway connection. `ApiKeySecretRef` supports `env:NAME` / `file:PATH` / `dev:KEY` / literal forms (resolved at `InitializeAsync`); prefer `env:`/`file:` in production. Never store a cleartext key. |
|
||||
| `GalaxyMxAccessOptions` (`MxAccess`) | `ClientName`; `PublishingIntervalMs` (`1000`); `WriteUserId` (`0` = anonymous); `EventPumpChannelCapacity` (`50000`) | MXAccess client identity + tuning. `ClientName` **must be unique per OtOpcUa instance** (redundancy pairs enforce this). |
|
||||
| `GalaxyRepositoryOptions` (`Repository`) | `DiscoverPageSize` (`5000`); `WatchDeployEvents` (`true`) | Galaxy Repository browse paging + deploy-event watching. |
|
||||
| `GalaxyReconnectOptions` (`Reconnect`) | `InitialBackoffMs` (`500`); `MaxBackoffMs` (`30000`); `ReplayOnSessionLost` (`true`) | In-driver reconnect-supervisor backoff. |
|
||||
| (top-level) | `ProbeTimeoutSeconds` (`30`, range 1–60) | AdminUI Test-Connect probe timeout. |
|
||||
|
||||
> The `OTOPCUA_GALAXY_*` environment variables that v1's in-process `Galaxy.Host` consumed **no longer live in this repo** — they moved into the separately-installed mxaccessgw gateway's own config (see the v1 archive pointer in `docs/README.md` and the Galaxy overview at [`docs/drivers/Galaxy.md`](drivers/Galaxy.md)). The only Galaxy connection secret this repo touches is the gateway API key via `ApiKeySecretRef` above.
|
||||
|
||||
### Historian config (env-driven sidecar)
|
||||
|
||||
The Wonderware Historian runs as a supervised sidecar process whose configuration arrives **entirely through environment variables**, not an `appsettings` section. The sidecar entry point (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Program.cs`) reads them at spawn time. See the `OTOPCUA_HISTORIAN_*` rows in the environment-variable table below. The in-process client-side options POCO is `WonderwareHistorianClientOptions` (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/WonderwareHistorianClientOptions.cs`): `PipeName`, `SharedSecret`, `PeerName` (`OtOpcUa`), `ConnectTimeout` (default 10s), `CallTimeout` (default 30s), `ProbeTimeoutSeconds` (`15`).
|
||||
|
||||
---
|
||||
|
||||
## Environment variables
|
||||
|
||||
All names are read in this repo's source via `Environment.GetEnvironmentVariable(...)` unless noted otherwise. Defaults shown are the in-source fallbacks.
|
||||
|
||||
### Host / cluster / Config DB
|
||||
|
||||
| Variable | Read by | Effect / default |
|
||||
|---|---|---|
|
||||
| `OTOPCUA_ROLES` | `src/Server/ZB.MOM.WW.OtOpcUa.Host/Program.cs` (`RoleParser.Parse`) | Comma-separated cluster roles for the node (`admin`, `driver`, `dev`). Drives the conditional wiring and the per-role appsettings overlay. Used when `Cluster:Roles` is empty. |
|
||||
| `OTOPCUA_CONFIG_CONNECTION` | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/DesignTimeDbContextFactory.cs` (design-time / `dotnet ef` only) | Read at **design time** by `DesignTimeDbContextFactory.cs` for `dotnet ef` migrations. At **runtime** the server resolves the connection string from `ConnectionStrings:ConfigDb` (env form: `ConnectionStrings__ConfigDb`) via `configuration.GetConnectionString("ConfigDb")` in `ServiceCollectionExtensions.cs` — `OTOPCUA_CONFIG_CONNECTION` appears there only as a hint in an error message, not via `GetEnvironmentVariable`. No credential is embedded in source. |
|
||||
| `OTOPCUA_ALLOWED_SID` | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Program.cs` | SID of the server principal allowed to connect to the historian sidecar's named pipe (passed by the supervisor at spawn). Required — sidecar throws if unset. |
|
||||
| `ASPNETCORE_ENVIRONMENT` | ASP.NET host builder (framework) | Selects `appsettings.{Environment}.json` (e.g. `Development`). |
|
||||
|
||||
### Historian sidecar (`OTOPCUA_HISTORIAN_*`)
|
||||
|
||||
All read in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Program.cs`.
|
||||
|
||||
| Variable | Effect / default |
|
||||
|---|---|
|
||||
| `OTOPCUA_HISTORIAN_PIPE` | Named-pipe name the sidecar listens on. Required (throws if unset). |
|
||||
| `OTOPCUA_HISTORIAN_SECRET` | Per-process shared secret verified in the pipe Hello frame. Required (throws if unset). |
|
||||
| `OTOPCUA_HISTORIAN_ENABLED` | `true` opens the real Wonderware SDK connection; anything else → pipe-only mode (smoke/IPC tests). Default: not-true → pipe-only. |
|
||||
| `OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED` | `false` disables the alarm-event writer (sidecar rejects `WriteAlarmEvents`). Default `true` (when `ENABLED=true`). |
|
||||
| `OTOPCUA_HISTORIAN_INTEGRATED` | `false` → SQL auth (use `USER`/`PASS`); any other value → integrated security. Default: integrated. |
|
||||
| `OTOPCUA_HISTORIAN_SERVER` | Historian server hostname. Default `localhost`. |
|
||||
| `OTOPCUA_HISTORIAN_SERVERS` | Comma-separated multi-node server list (overrides single `SERVER` when set). |
|
||||
| `OTOPCUA_HISTORIAN_PORT` | Historian port. Default `32568`. |
|
||||
| `OTOPCUA_HISTORIAN_USER` | SQL username (when not integrated). |
|
||||
| `OTOPCUA_HISTORIAN_PASS` | SQL password (when not integrated). Never commit a value. |
|
||||
| `OTOPCUA_HISTORIAN_TIMEOUT_SEC` | Command timeout (seconds). Default `30`. |
|
||||
| `OTOPCUA_HISTORIAN_MAX_VALUES` | Max values returned per read. Default `10000`. |
|
||||
| `OTOPCUA_HISTORIAN_COOLDOWN_SEC` | Failure cooldown (seconds). Default `60`. |
|
||||
|
||||
### Driver integration-test / fixture sim endpoints
|
||||
|
||||
These are consumed by the driver **integration-test fixtures** (under `tests/Drivers/...IntegrationTests/`), not by the production server. Each overrides the simulator endpoint a fixture TCP-probes; defaults point at the shared Docker host `10.100.0.35` (see `CLAUDE.md` Docker Workflow).
|
||||
|
||||
| Variable | Read by (fixture) | Default |
|
||||
|---|---|---|
|
||||
| `MODBUS_SIM_ENDPOINT` | `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.IntegrationTests/ModbusSimulatorFixture.cs` | `10.100.0.35:5020` |
|
||||
| `AB_SERVER_ENDPOINT` | `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/AbServerFixture.cs` | `10.100.0.35:44818` |
|
||||
| `S7_SIM_ENDPOINT` | `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.IntegrationTests/Snap7ServerFixture.cs` | `10.100.0.35:1102` (non-privileged; not S7-standard 102) |
|
||||
| `OPCUA_SIM_ENDPOINT` | `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.IntegrationTests/OpcPlcFixture.cs` | `opc.tcp://10.100.0.35:50000` |
|
||||
| `OTOPCUA_FOCAS_SIM_ENDPOINT` | `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/FocasSimFixture.cs` | `localhost:8193` |
|
||||
|
||||
> Additional harness/parity/soak env vars (`OTOPCUA_FOCAS_*`, `OTOPCUA_PARITY_*`, `OTOPCUA_SOAK_*`, `OTOPCUA_HARNESS_USE_SQL`) exist only in the test/parity/soak harnesses, not in production source, and are out of scope for this reference.
|
||||
|
||||
---
|
||||
|
||||
## See also
|
||||
|
||||
- [`security.md`](security.md) — transport security, OPC UA authentication, LDAP (`Security:Ldap`), data-plane ACLs, control-plane roles.
|
||||
- [`Redundancy.md`](Redundancy.md) — the `Cluster` section in the context of warm/hot redundancy, ServiceLevel, peer discovery.
|
||||
- [`ServiceHosting.md`](ServiceHosting.md) — role-based host wiring and `OTOPCUA_ROLES`.
|
||||
- [`docs/drivers/Galaxy.md`](drivers/Galaxy.md) — Galaxy/MxAccess driver overview.
|
||||
- [`docs/v2/config-db-schema.md`](v2/config-db-schema.md) — the full Config DB schema.
|
||||
@@ -4,7 +4,7 @@ Ad-hoc probe / read / write / subscribe tool for SLC 500 / MicroLogix 1100 /
|
||||
MicroLogix 1400 / PLC-5 devices, talking to the **same** `AbLegacyDriver` the
|
||||
OtOpcUa server uses (libplctag PCCC back-end).
|
||||
|
||||
Third of four driver test-client CLIs. Shares `Driver.Cli.Common` with the
|
||||
Third of six driver test-client CLIs. Shares `Driver.Cli.Common` with the
|
||||
others.
|
||||
|
||||
## Build + run
|
||||
|
||||
@@ -5,8 +5,8 @@ through the **same** `ModbusDriver` the OtOpcUa server uses. Mirrors the v1
|
||||
OPC UA `otopcua-cli` shape so the muscle memory carries over: drop to a shell,
|
||||
point at a PLC, watch registers move.
|
||||
|
||||
First of four driver test-client CLIs (Modbus → AB CIP → AB Legacy → S7 →
|
||||
TwinCAT). Built on the shared `ZB.MOM.WW.OtOpcUa.Driver.Cli.Common` library
|
||||
First of six driver test-client CLIs (Modbus → AB CIP → AB Legacy → S7 →
|
||||
TwinCAT → FOCAS). Built on the shared `ZB.MOM.WW.OtOpcUa.Driver.Cli.Common` library
|
||||
so each downstream CLI inherits verbose/log wiring + snapshot formatting
|
||||
without copy-paste.
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ Ad-hoc probe / read / write / subscribe tool for Siemens S7-300 / S7-400 /
|
||||
S7-1200 / S7-1500 (and compatible soft-PLCs) over S7comm / ISO-on-TCP port 102.
|
||||
Uses the **same** `S7Driver` the OtOpcUa server does (S7.Net under the hood).
|
||||
|
||||
Fourth of four driver test-client CLIs.
|
||||
Fourth of six driver test-client CLIs.
|
||||
|
||||
## Build + run
|
||||
|
||||
@@ -58,6 +58,12 @@ otopcua-s7-cli probe -h 192.168.1.31 -c S7300 --slot 2 -a DB1.DBW0
|
||||
|
||||
### `read`
|
||||
|
||||
Supported types: `Bool`, `Byte`, `Int16`, `UInt16`, `Int32`, `UInt32`, `Float32`.
|
||||
`Int64`, `UInt64`, `Float64`, `String`, and `DateTime` are defined in `S7DataType` but
|
||||
**not yet implemented** — the driver rejects them at initialisation and any read or write
|
||||
returns `BadNotSupported`
|
||||
(`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7/S7Driver.cs` — `UnimplementedDataTypes` set).
|
||||
|
||||
```powershell
|
||||
# DB word
|
||||
otopcua-s7-cli read -h 192.168.1.30 -a DB1.DBW0 -t Int16
|
||||
@@ -67,9 +73,6 @@ otopcua-s7-cli read -h 192.168.1.30 -a DB1.DBD4 -t Float32
|
||||
|
||||
# Merker bit
|
||||
otopcua-s7-cli read -h 192.168.1.30 -a M0.0 -t Bool
|
||||
|
||||
# 80-char S7 string
|
||||
otopcua-s7-cli read -h 192.168.1.30 -a DB10.STRING[0] -t String --string-length 80
|
||||
```
|
||||
|
||||
### `write`
|
||||
|
||||
@@ -5,7 +5,7 @@ TwinCAT 3 runtimes via ADS. Uses the **same** `TwinCATDriver` the OtOpcUa
|
||||
server does (`Beckhoff.TwinCAT.Ads` package). Native ADS notifications by
|
||||
default; `--poll-only` falls back to the shared `PollGroupEngine`.
|
||||
|
||||
Fifth (final) of the driver test-client CLIs.
|
||||
Fifth of six driver test-client CLIs.
|
||||
|
||||
## Build + run
|
||||
|
||||
@@ -55,7 +55,7 @@ Per-command flags:
|
||||
| Flag | Default | Purpose |
|
||||
|---|---|---|
|
||||
| `-s` / `--symbol` | **required** | Symbol path to probe (e.g. `MAIN.bRunning`) |
|
||||
| `--type` | `DInt` | Declared data type — see the [Data types](#data-types) list |
|
||||
| `-t` / `--type` | `DInt` | Declared data type — see the [Data types](#data-types) list |
|
||||
|
||||
```powershell
|
||||
# Local TwinCAT 3, probe a canonical global
|
||||
|
||||
+9
-5
@@ -35,6 +35,10 @@ Every driver CLI exposes the same four verbs:
|
||||
push where available (TwinCAT ADS notifications) and falls back to polling
|
||||
(`PollGroupEngine`) where the protocol has no push (Modbus, AB, S7, FOCAS).
|
||||
|
||||
The TwinCAT CLI adds a fifth verb, **`browse`** — it walks the controller's
|
||||
symbol table via the driver's `DiscoverAsync` path and prints every symbol the
|
||||
atomic-type mapper recognises. No other driver CLI ships `browse`.
|
||||
|
||||
## Shared infrastructure
|
||||
|
||||
All six CLIs depend on `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/`:
|
||||
@@ -88,8 +92,8 @@ their flag values to the already-shipped driver.
|
||||
## Tracking
|
||||
|
||||
Tasks #249 / #250 / #251 shipped the original five. The FOCAS CLI followed
|
||||
alongside the Tier-C isolation work on task #220 — no CLI-level test
|
||||
project (hardware-gated). 122 unit tests cumulative across the first five
|
||||
(16 shared-lib + 106 CLI-specific) — run
|
||||
`dotnet test tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.Tests` +
|
||||
`tests/ZB.MOM.WW.OtOpcUa.Driver.*.Cli.Tests` to re-verify.
|
||||
alongside the Tier-C isolation work on task #220. Every CLI — FOCAS included —
|
||||
ships its own unit-test project under `tests/Drivers/Cli/`, alongside the shared
|
||||
`tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.Tests`. Re-verify with
|
||||
`dotnet test tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.Tests` and
|
||||
each per-family `tests/Drivers/Cli/...Cli.Tests` project.
|
||||
|
||||
@@ -0,0 +1,295 @@
|
||||
# Driver Lifecycle & Server Infrastructure Contracts
|
||||
|
||||
Reference for the server-side infrastructure interfaces that surround a
|
||||
driver but are **not** driver *capabilities* (read/write/subscribe/etc.,
|
||||
documented in [ReadWriteOperations.md](ReadWriteOperations.md) and the
|
||||
per-driver pages). These contracts live in
|
||||
[`src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/)
|
||||
so they carry no behavior — concrete implementations live in the driver
|
||||
projects, the Runtime, and the ControlPlane. Each subsection below gives the
|
||||
purpose, the key members, and where it is implemented/used.
|
||||
|
||||
The capability interfaces a driver opts into (`IReadable`, `IWritable`,
|
||||
`ITagDiscovery`, `ISubscribable`, `IAlarmSource`, `IHistoryProvider`,
|
||||
`IHostConnectivityProbe`, `IPerCallHostResolver`, `IRediscoverable`) are
|
||||
covered elsewhere and discovered by the server via `is`-checks on the
|
||||
`IDriver` instance. The interfaces here are the *plumbing* the server uses to
|
||||
**create**, **probe**, **supervise**, **report on**, and **configure** those
|
||||
drivers, plus the server-side historian read surface.
|
||||
|
||||
---
|
||||
|
||||
## IDriverFactory — creating drivers from config rows
|
||||
|
||||
[`Core.Abstractions/IDriverFactory.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverFactory.cs)
|
||||
|
||||
Abstraction over the process-wide driver registry. The Runtime consumes this
|
||||
instead of the concrete registry so the Runtime project does not pull in
|
||||
`ZB.MOM.WW.OtOpcUa.Core` (which would drag in Polly + driver hosting).
|
||||
|
||||
Members:
|
||||
|
||||
- `IDriver? TryCreate(string driverType, string driverInstanceId, string driverConfigJson)`
|
||||
— returns a new driver for the given type, or `null` when no factory is
|
||||
registered for that type (missing assembly, typo). The `DriverHostActor`
|
||||
logs and skips the row rather than failing the whole apply.
|
||||
- `IReadOnlyCollection<string> SupportedTypes` — driver-type names this
|
||||
factory can materialise; mostly for diagnostics and logs.
|
||||
|
||||
Implementations:
|
||||
|
||||
- `NullDriverFactory` (same file) returns `null` from every `TryCreate` and
|
||||
exposes zero supported types. Bound when no concrete driver assemblies have
|
||||
been registered (Mac dev path, smoke tests); the deployment becomes a no-op.
|
||||
- `DriverFactoryRegistry`
|
||||
([`Core/Hosting/DriverFactoryRegistry.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverFactoryRegistry.cs))
|
||||
is the real process-singleton registry keyed by `DriverInstance.DriverType`
|
||||
(case-insensitive). Each driver project ships a `Register(...)` extension;
|
||||
`Register` records the factory **and** the driver's stability
|
||||
[`DriverTier`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTier.cs)
|
||||
(defaults to Tier A). Registering the same type twice throws.
|
||||
- `DriverFactoryRegistryAdapter`
|
||||
([`Core/Hosting/DriverFactoryRegistryAdapter.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverFactoryRegistryAdapter.cs))
|
||||
bridges the registry to the `IDriverFactory` abstraction.
|
||||
|
||||
Wiring: `DriverFactoryBootstrap.AddOtOpcUaDriverFactories`
|
||||
([`Host/Drivers/DriverFactoryBootstrap.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.Host/Drivers/DriverFactoryBootstrap.cs))
|
||||
registers the singleton registry, runs every driver assembly's `Register`
|
||||
extension, then binds `IDriverFactory` to the adapter. It must run **before**
|
||||
`AddAkka` so the Runtime can resolve `IDriverFactory` when spawning the
|
||||
`DriverHostActor`
|
||||
([`Runtime/Drivers/DriverHostActor.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs)).
|
||||
The registry is skipped on admin-only nodes (they never run drivers); the
|
||||
probe set is the exception — see [IDriverProbe](#idriverprobe--test-connect).
|
||||
|
||||
---
|
||||
|
||||
## IDriverProbe — Test Connect
|
||||
|
||||
[`Core.Abstractions/IDriverProbe.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverProbe.cs)
|
||||
|
||||
A cheap test-connect probe for one driver type, backing the AdminUI **Test
|
||||
Connect** button. An implementation deserializes a driver-config JSON, attempts
|
||||
a cheap connection (TCP open, OPC UA session, gRPC ping — whatever the driver's
|
||||
native protocol supports), and reports success/failure with latency. **Probes
|
||||
must not mutate persistent state**: the AdminUI invokes them against the
|
||||
transient config in the typed form, not against the persisted `DriverInstance`
|
||||
row.
|
||||
|
||||
Members:
|
||||
|
||||
- `string DriverType { get; }` — the `DriverInstance.DriverType` string this
|
||||
probe handles; used for DI lookup.
|
||||
- `Task<DriverProbeResult> ProbeAsync(string configJson, TimeSpan timeout, CancellationToken ct)`
|
||||
— never throws on connection failure; returns a result with `Ok = false`
|
||||
and a message instead.
|
||||
- `DriverProbeResult(bool Ok, string? Message, TimeSpan? Latency)` — outcome
|
||||
record (`Message` is `null` on success; `Latency` is `null` on failure).
|
||||
|
||||
Implementations: every driver ships a `*DriverProbe` in its driver project
|
||||
(e.g.
|
||||
[`Driver.Modbus/ModbusDriverProbe.cs`](../src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverProbe.cs)
|
||||
does a bare socket open/close), plus the Wonderware historian's
|
||||
`WonderwareHistorianDriverProbe`.
|
||||
|
||||
Flow: the AdminUI's `AdminProbeService`
|
||||
([`AdminUI/Clients/AdminProbeService.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Clients/AdminProbeService.cs))
|
||||
dispatches a `TestDriverConnect` message through `IAdminOperationsClient` to the
|
||||
cluster-singleton `AdminOperationsActor`
|
||||
([`ControlPlane/AdminOperations/AdminOperationsActor.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs)),
|
||||
which holds the probes keyed by `DriverType` and invokes the matching one
|
||||
(timeout clamped to `[1, 60]` seconds). Because the admin singleton is
|
||||
admin-pinned, the probe set must be registered on admin nodes too — `Program.cs`
|
||||
calls `AddOtOpcUaDriverProbes` in the `hasAdmin` block, and
|
||||
`AddOtOpcUaDriverFactories` registers it for fused admin+driver nodes.
|
||||
|
||||
---
|
||||
|
||||
## IDriverSupervisor — Tier C out-of-process recycle
|
||||
|
||||
[`Core.Abstractions/IDriverSupervisor.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverSupervisor.cs)
|
||||
|
||||
The process-level supervisor contract a **Tier C** (out-of-process) driver's
|
||||
topology provides. Its concern is restarting the out-of-process Host when a
|
||||
hard fault is detected (memory breach, wedge, scheduled recycle window). Tier
|
||||
A/B drivers run in-process and do **not** have a supervisor — recycling them
|
||||
would kill every OPC UA session and every co-hosted driver. The Core.Stability
|
||||
layer only invokes this interface after asserting the tier.
|
||||
|
||||
Members:
|
||||
|
||||
- `string DriverInstanceId { get; }` — the driver instance this supervisor
|
||||
governs.
|
||||
- `Task RecycleAsync(string reason, CancellationToken cancellationToken)` —
|
||||
request a terminate+restart of the Host process; implementations are
|
||||
expected to be idempotent under repeat calls during an in-flight recycle.
|
||||
|
||||
Callers (both in
|
||||
[`Core/Stability/`](../src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/)):
|
||||
|
||||
- `ScheduledRecycleScheduler`
|
||||
([`Core/Stability/ScheduledRecycleScheduler.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/ScheduledRecycleScheduler.cs))
|
||||
— opt-in periodic recycle. A `TickAsync` method advanced by the caller's
|
||||
ambient scheduler decides whether the configured interval has elapsed and, if
|
||||
so, drives `RecycleAsync`. Its constructor throws unless the tier is C, making
|
||||
in-process misuse structurally impossible.
|
||||
- `MemoryRecycle`
|
||||
([`Core/Stability/MemoryRecycle.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs))
|
||||
— on a memory hard-breach, calls `RecycleAsync` (when a supervisor is wired).
|
||||
|
||||
---
|
||||
|
||||
## IDriverHealthPublisher — health pub/sub sink
|
||||
|
||||
[`Core.Abstractions/IDriverHealthPublisher.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverHealthPublisher.cs)
|
||||
|
||||
A sink for driver-health state-change notifications. Implementations must be
|
||||
non-blocking and safe to call from any thread.
|
||||
|
||||
Member:
|
||||
|
||||
- `void Publish(string clusterId, string driverInstanceId, DriverHealth health, int errorCount5Min)`
|
||||
|
||||
Implementations:
|
||||
|
||||
- `NullDriverHealthPublisher` (same file) is the drop-in no-op for tests and
|
||||
dev-stub paths. A `DriverInstanceActor` defaults to it when no publisher is
|
||||
supplied.
|
||||
- `AkkaDriverHealthPublisher`
|
||||
([`Runtime/Drivers/AkkaDriverHealthPublisher.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/AkkaDriverHealthPublisher.cs))
|
||||
is the production binding: it forwards each transition as a
|
||||
`DriverHealthChanged` message onto the cluster-wide `driver-health`
|
||||
Akka DistributedPubSub topic.
|
||||
|
||||
Producer: `DriverInstanceActor`
|
||||
([`Runtime/Drivers/DriverInstanceActor.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs))
|
||||
calls `Publish` when a driver's health transitions. The published snapshot is
|
||||
consumed AdminUI-side and surfaced through the driver-status panel (read
|
||||
in-process by the AdminUI bridge rather than dialing its own hub).
|
||||
|
||||
---
|
||||
|
||||
## IDriverConfigEditor — custom AdminUI config editor (plug-point)
|
||||
|
||||
[`Core.Abstractions/IDriverConfigEditor.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverConfigEditor.cs)
|
||||
|
||||
An **optional** plug-point a driver can implement to provide a custom AdminUI
|
||||
editor for its `DriverConfig` JSON. Drivers that don't implement it fall back to
|
||||
the generic JSON editor with schema-driven validation. This is the contract
|
||||
between the driver and the Admin Blazor app; the Admin app discovers
|
||||
implementations and slots them into the Driver Detail screen.
|
||||
|
||||
Members:
|
||||
|
||||
- `string DriverType { get; }` — the driver type this editor handles.
|
||||
- `Type EditorComponentType { get; }` — the Razor component type that renders
|
||||
the editor (returned as `Type` so `Core.Abstractions` needs no Blazor
|
||||
reference).
|
||||
|
||||
Status: this is a forward-looking plug-point. No driver ships a concrete
|
||||
`IDriverConfigEditor` today — every driver uses the generic JSON editor — so
|
||||
the interface currently has the contract defined but no implementations.
|
||||
|
||||
---
|
||||
|
||||
## IHistorianDataSource — server-side historian read surface
|
||||
|
||||
[`Core.Abstractions/Historian/IHistorianDataSource.cs`](../src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/Historian/IHistorianDataSource.cs)
|
||||
|
||||
The server-side historian read surface. Registered with the server's history
|
||||
router and resolved **per OPC UA namespace**, independent of any driver's
|
||||
lifecycle. This is distinct from the driver capability `IHistoryProvider`:
|
||||
|
||||
- `IHistoryProvider` is a *driver capability* — the server dispatches to it via
|
||||
the driver instance.
|
||||
- `IHistorianDataSource` is a *server registration* — the server resolves it by
|
||||
namespace and calls it directly, so one historian (e.g. Wonderware) can serve
|
||||
many drivers' nodes, and drivers can restart without dropping history
|
||||
availability.
|
||||
|
||||
The interface is `: IDisposable` and declares the full read surface as
|
||||
**required** members (unlike `IHistoryProvider`, where at-time/event reads are
|
||||
optional default-impl methods so legacy drivers can stay raw-only):
|
||||
|
||||
- `ReadRawAsync(fullReference, startUtc, endUtc, maxValuesPerNode, ct)` — raw
|
||||
historical samples over a time range.
|
||||
- `ReadProcessedAsync(fullReference, startUtc, endUtc, interval, aggregate, ct)`
|
||||
— interval-bucketed aggregates (average/min/max/count); an empty bucket
|
||||
returns a `BadNoData` sample.
|
||||
- `ReadAtTimeAsync(fullReference, timestampsUtc, ct)` — one sample per requested
|
||||
timestamp (OPC UA HistoryReadAtTime); the returned list matches the requested
|
||||
length and order, gaps as Bad-quality snapshots.
|
||||
- `ReadEventsAsync(sourceName, startUtc, endUtc, maxEvents, ct)` — historical
|
||||
alarm/event records (OPC UA HistoryReadEvents); `sourceName` is `null` to
|
||||
return all sources. `maxEvents` is a signed `int` so a non-positive value is a
|
||||
"use the backend's default cap" sentinel.
|
||||
- `GetHealthSnapshot()` — point-in-time health snapshot for diagnostics and
|
||||
dashboards; pure observation, never blocks on backend I/O.
|
||||
|
||||
All values use the shared `DataValueSnapshot` / `HistoricalEvent` shapes;
|
||||
backend-specific quality/type encodings are translated to OPC UA `StatusCode`
|
||||
uints inside the data source.
|
||||
|
||||
Implementations:
|
||||
|
||||
- `WonderwareHistorianClient`
|
||||
([`Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs`](../src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs))
|
||||
— the .NET 10 client that talks to the Wonderware historian sidecar over a
|
||||
named pipe. It implements both `IHistorianDataSource` (read paths) and
|
||||
`IAlarmHistorianWriter` (the alarm-event drain target; see
|
||||
[AlarmHistorian.md](AlarmHistorian.md)).
|
||||
- `HistorianDataSource`
|
||||
([`Driver.Historian.Wonderware/Backend/HistorianDataSource.cs`](../src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Backend/HistorianDataSource.cs))
|
||||
— the in-process backend implementation behind the sidecar.
|
||||
|
||||
The optional Wonderware historian sidecar setup is described in
|
||||
[ServiceHosting.md](ServiceHosting.md).
|
||||
|
||||
---
|
||||
|
||||
## Commons — shared cross-cutting primitives
|
||||
|
||||
[`src/Core/ZB.MOM.WW.OtOpcUa.Commons/`](../src/Core/ZB.MOM.WW.OtOpcUa.Commons/)
|
||||
|
||||
`ZB.MOM.WW.OtOpcUa.Commons` is the low-level shared library that the Runtime,
|
||||
ControlPlane, AdminUI, and OPC UA server projects all reference. It holds
|
||||
cross-cutting primitives with no driver- or host-specific behavior, so the
|
||||
heavier projects can share message contracts and value types without taking a
|
||||
dependency on each other. It references only `Akka` and the internal
|
||||
`ZB.MOM.WW.Audit` package.
|
||||
|
||||
Folders:
|
||||
|
||||
- **`Messages/`** — Akka message contracts grouped by concern (`Admin`,
|
||||
`Alerts`, `Deploy`, `Drivers`, `Fleet`, `Logging`, `Redundancy`). These are
|
||||
the wire/inter-actor messages — e.g. `Messages/Admin/TestDriverConnect.cs`
|
||||
(Test Connect request, see [IDriverProbe](#idriverprobe--test-connect)) and
|
||||
`Messages/Drivers/DriverHealthChanged.cs` (the driver-health pub/sub payload,
|
||||
see [IDriverHealthPublisher](#idriverhealthpublisher--health-pubsub-sink)).
|
||||
- **`Interfaces/`** — cluster-facing client contracts such as
|
||||
`IAdminOperationsClient`, `IClusterRoleInfo`, and `IFleetDiagnosticsClient`.
|
||||
- **`Types/`** — strongly-typed identifier value types: `CorrelationId`,
|
||||
`DeploymentId`, `ExecutionId`, `NodeId`, `RevisionHash`.
|
||||
- **`Browsing/`** — live-browse abstractions (`BrowseNode`, `IBrowseSession`,
|
||||
`IDriverBrowser`) backing the AdminUI address pickers.
|
||||
- **`Engines/`** — evaluator seams (`IScriptedAlarmEvaluator`,
|
||||
`IVirtualTagEvaluator`, `IAlarmActorStateStore`) consumed by the
|
||||
[VirtualTags](VirtualTags.md) / [ScriptedAlarms](ScriptedAlarms.md) engines.
|
||||
- **`OpcUa/`** — deferred-publish seams (`IOpcUaAddressSpaceSink`,
|
||||
`IServiceLevelPublisher` and their `Deferred*` no-op stand-ins) so address-space
|
||||
and [ServiceLevel](Redundancy.md) writes can be wired late.
|
||||
- **`Observability/`** — `OtOpcUaTelemetry` (the shared ActivitySource/metrics
|
||||
surface).
|
||||
|
||||
---
|
||||
|
||||
## See also
|
||||
|
||||
- [ReadWriteOperations.md](ReadWriteOperations.md) — the driver *capability*
|
||||
interfaces (read/write/subscribe) and resilience pipeline.
|
||||
- [ServiceHosting.md](ServiceHosting.md) — role gating, the Akka cluster, and
|
||||
the optional Wonderware historian sidecar.
|
||||
- [AlarmHistorian.md](AlarmHistorian.md) — the store-and-forward SQLite alarm
|
||||
sink that drains to `IAlarmHistorianWriter`.
|
||||
- [Redundancy.md](Redundancy.md) — driver stability tiers in the redundancy
|
||||
context.
|
||||
@@ -1,6 +1,6 @@
|
||||
# Incremental Sync
|
||||
|
||||
Two distinct change-detection paths feed the running server: driver-backend rediscovery (Galaxy's `time_of_last_deploy`, TwinCAT's symbol-version-changed, OPC UA Client's upstream namespace change) and generation-level config publishes from the Admin UI. Both flow into re-runs of `ITagDiscovery.DiscoverAsync`, but they originate differently.
|
||||
Two distinct change-detection paths feed the running server: driver-backend rediscovery (Galaxy's `time_of_last_deploy`, TwinCAT's symbol-version-changed) and generation-level config publishes from the Admin UI. Both flow into re-runs of `ITagDiscovery.DiscoverAsync`, but they originate differently.
|
||||
|
||||
## Driver-backend rediscovery — IRediscoverable
|
||||
|
||||
@@ -18,9 +18,8 @@ The driver fires the event with a reason string (for the diagnostic log) and an
|
||||
|
||||
Drivers that implement the capability today:
|
||||
|
||||
- **Galaxy** — polls `galaxy.time_of_last_deploy` in the Galaxy repository DB and fires on change. This is Galaxy-internal change detection, not the platform-wide mechanism.
|
||||
- **TwinCAT** — observes ADS symbol-version-changed notifications (`0x0702`).
|
||||
- **OPC UA Client** — subscribes to the upstream server's `Server/NamespaceArray` change notifications.
|
||||
- **Galaxy** — `DeployWatcher` (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Browse/DeployWatcher.cs`) subscribes to the mxaccessgw gRPC stream (`IGalaxyDeployWatchSource.WatchAsync`) and fires on a new `time_of_last_deploy` value. The gateway polls the Galaxy repository DB internally; the driver side is event-driven.
|
||||
- **TwinCAT** — observes ADS symbol-version-changed notifications (ADS error `DeviceSymbolVersionInvalid`, decimal 1809 / `0x0711`). Note: legacy Beckhoff documentation sometimes cites `0x0702` (`DeviceInvalidGroup`) — that is a transcription error; the correct code is `0x0711` per `TwinCATStatusMapper.AdsSymbolVersionChanged` (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATStatusMapper.cs:35`).
|
||||
|
||||
Static drivers (Modbus, S7, AB CIP, AB Legacy, FOCAS) do not implement `IRediscoverable` — their tags only change when a new generation is published from the Config DB. Core sees absence of the interface and skips change-detection wiring for those drivers (decision #54).
|
||||
|
||||
@@ -49,7 +48,7 @@ Exceptions during teardown are swallowed per decision #12 — a driver throw mus
|
||||
|
||||
## Scope hint
|
||||
|
||||
When `RediscoveryEventArgs.ScopeHint` is non-null (e.g. a folder path), Core restricts the diff to that subtree. This matters for Galaxy Platform-scoped deployments where a `time_of_last_deploy` advance may only affect one platform's subtree, and for OPC UA Client where an upstream change may be localized. Null scope falls back to a full-tree diff.
|
||||
When `RediscoveryEventArgs.ScopeHint` is non-null (e.g. a folder path), Core restricts the diff to that subtree. This matters for Galaxy Platform-scoped deployments where a `time_of_last_deploy` advance may only affect one platform's subtree. Null scope falls back to a full-tree diff.
|
||||
|
||||
## Virtual tags in the rebuild
|
||||
|
||||
|
||||
+55
-45
@@ -1,89 +1,99 @@
|
||||
# OPC UA Server
|
||||
|
||||
The OPC UA server component (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs`) hosts the OPC UA stack and exposes one browsable subtree per registered driver. The server itself is driver-agnostic — Galaxy/MXAccess, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client are all plugged in as `IDriver` implementations via the capability interfaces in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
|
||||
The OPC UA server component (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/`) hosts the OPC UA stack and exposes a browsable address space built from the registered drivers. The server itself is driver-agnostic — Galaxy/MXAccess, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client are all plugged in as `IDriver` implementations via the capability interfaces in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
|
||||
|
||||
In v2 the Server and Admin processes were fused into a single role-gated `ZB.MOM.WW.OtOpcUa.Host` binary. Which subsystems start (OPC UA endpoint, Admin UI, control plane, driver runtime) is decided by the `OTOPCUA_ROLES` gate, not by running separate executables. See `docs/ServiceHosting.md` for the role model.
|
||||
|
||||
## Composition
|
||||
|
||||
`OtOpcUaServer` subclasses the OPC Foundation `StandardServer` and wires:
|
||||
`OtOpcUaSdkServer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs`) subclasses the OPC Foundation `StandardServer` and wires a single custom node manager:
|
||||
|
||||
- A `DriverHost` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs`) which registers drivers and holds the per-instance `IDriver` references.
|
||||
- One `DriverNodeManager` per registered driver (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`), constructed in `CreateMasterNodeManager`. Each manager owns its own namespace URI (`urn:OtOpcUa:{DriverInstanceId}`) and exposes the driver as a subtree under the standard `Objects` folder.
|
||||
- A `CapabilityInvoker` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs`) per driver instance, keyed on `(DriverInstanceId, HostName, DriverCapability)` against the shared `DriverResiliencePipelineBuilder`. Every Read/Write/Discovery/Subscribe/HistoryRead/AlarmSubscribe call on the driver flows through this invoker so the Polly pipeline (retry / timeout / breaker / bulkhead) applies. The OTOPCUA0001 Roslyn analyzer enforces the wrapping at compile time.
|
||||
- An `IUserAuthenticator` (LDAP in production, injected stub in tests) for `UserName` token validation in the `ImpersonateUser` hook.
|
||||
- Optional `AuthorizationGate` + `NodeScopeResolver` (Phase 6.2) that sit in front of every dispatch call. In lax mode the gate passes through when the identity lacks LDAP groups so existing integration tests keep working; strict mode (`Authorization:StrictMode = true`) denies those cases.
|
||||
- `CreateMasterNodeManager` constructs one `OtOpcUaNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`) — a `CustomNodeManager2` subclass that owns the writable address space under the namespace `https://zb.com/otopcua/ns` and a single `OtOpcUa` root folder organized under the standard `Objects` folder. It is wrapped in a `MasterNodeManager` with no additional core managers.
|
||||
- `OtOpcUaSdkServer.NodeManager` exposes the live node manager after `StartAsync`, so the hosting layer can wrap it in a `SdkAddressSpaceSink` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs`) and hand it to `OpcUaPublishActor`.
|
||||
|
||||
`OtOpcUaServer.DriverNodeManagers` exposes the materialized list so the hosting layer can walk each one post-start and call `GenericDriverNodeManager.BuildAddressSpaceAsync(manager)` — the manager is passed as its own `IAddressSpaceBuilder`.
|
||||
Address-space population is push-driven: drivers stream discovery and data-change events through the Akka actor system (`DriverInstanceActor` → `OpcUaPublishActor`), and `OpcUaPublishActor` writes them into the node manager through the `IOpcUaAddressSpaceSink` seam. `OtOpcUaNodeManager.EnsureFolder` / `EnsureVariable` materialize the UNS folder + variable hierarchy; `WriteValue` / `WriteAlarmState` push runtime values and fire `ClearChangeMasks` so subscribed clients see updates.
|
||||
|
||||
The driver-agnostic walk that turns a driver's discovery into folder/variable calls lives in `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`): it walks `ITagDiscovery.DiscoverAsync` into an `IAddressSpaceBuilder`, captures alarm-condition sinks for variables flagged via `IVariableHandle.MarkAsAlarmCondition`, subscribes to `IAlarmSource.OnAlarmEvent`, and routes each alarm transition to the sink registered for its `SourceNodeId`.
|
||||
|
||||
The lifecycle facade `OpcUaApplicationHost` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs`) owns the `ApplicationInstance` + `ApplicationConfiguration` lifetime, starts the `StandardServer`, and attaches the `ImpersonateUser` hook (see Session impersonation).
|
||||
|
||||
## Resilience and capability dispatch
|
||||
|
||||
Driver-capability calls (`IReadable.ReadAsync`, `IWritable.WriteAsync`, `ITagDiscovery.DiscoverAsync`, `ISubscribable.SubscribeAsync/UnsubscribeAsync`, the `IHostConnectivityProbe` probe loop, `IAlarmSource` surfaces, and the four `IHistoryProvider` reads) are routed through a `CapabilityInvoker` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs`) so the Polly resilience pipeline (retry / timeout / breaker / bulkhead) applies. There is one invoker per `(DriverInstance, IDriver)` pair; all invokers share the process-singleton `DriverResiliencePipelineBuilder`, which keys pipelines on `(DriverInstanceId, hostName, DriverCapability)`. Per-instance resilience options come from `DriverTypeRegistry` (the driver's tier) plus per-instance JSON overrides parsed from `DriverInstance.ResilienceConfig` by `DriverResilienceOptionsParser`.
|
||||
|
||||
The `OTOPCUA0001` Roslyn analyzer (`src/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers/UnwrappedCapabilityCallAnalyzer.cs`, category `OtOpcUa.Resilience`, severity Warning) flags direct driver-capability calls that bypass the invoker.
|
||||
|
||||
| Capability | Surface | Invoker entry point |
|
||||
|---|---|---|
|
||||
| Read | `IReadable.ReadAsync` | `ExecuteAsync(DriverCapability.Read, host, …)` |
|
||||
| Write | `IWritable.WriteAsync` | `ExecuteWriteAsync(host, isIdempotent, …)` — disables retries for non-idempotent writes per `WriteIdempotentAttribute` / decisions #44-45, #143 |
|
||||
| Discovery | `ITagDiscovery.DiscoverAsync` | `ExecuteAsync(DriverCapability.Discover, host, …)` |
|
||||
| Subscribe / Unsubscribe | `ISubscribable.SubscribeAsync/UnsubscribeAsync` | `ExecuteAsync(DriverCapability.Subscribe, host, …)` |
|
||||
| HistoryRead (raw / processed / at-time / events) | `IHistoryProvider.*Async` | `ExecuteAsync(DriverCapability.HistoryRead, host, …)` |
|
||||
| Alarm subscribe / unsubscribe / acknowledge | `IAlarmSource.SubscribeAlarmsAsync/UnsubscribeAlarmsAsync/AcknowledgeAsync` | via `AlarmSurfaceInvoker` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/AlarmSurfaceInvoker.cs`), which fans out per host |
|
||||
|
||||
The host name fed to the invoker comes from `IPerCallHostResolver.ResolveHost(fullReference)` when the driver implements it (multi-host drivers: AB CIP, Modbus, FOCAS, TwinCAT, AB Legacy resolve per device). Single-host drivers fall back to `DriverInstanceId`, preserving the per-instance pipeline-key semantics (decision #144).
|
||||
|
||||
## Configuration
|
||||
|
||||
Server wiring used to live in `appsettings.json`. It now flows from the SQL Server **Config DB**: `ServerInstance` + `DriverInstance` + `Tag` + `NodeAcl` rows are published as a *generation* via `sp_PublishGeneration` and loaded into the running process by the generation applier. The Admin UI (Blazor Server, `docs/v2/admin-ui.md`) is the operator surface — drafts accumulate edits; `sp_ComputeGenerationDiff` drives the DiffViewer preview; a UNS drag-reorder carries a `DraftRevisionToken` so Confirm re-checks against the current draft and returns 409 if it advanced (decision #161). See `docs/v2/config-db-schema.md` for the schema.
|
||||
Tenant-scoped server wiring flows from the SQL Server **Config DB**, not from `appsettings.json`: `ServerInstance` + `DriverInstance` + `Tag` + `NodeAcl` rows are published as a *generation* by `sp_PublishGeneration` and loaded into the running process by the generation applier. The Admin UI (Blazor Server, `docs/v2/admin-ui.md`) is the operator surface — drafts accumulate edits and `sp_ComputeGenerationDiff` drives the DiffViewer preview before publish. Optimistic concurrency uses each entity's `RowVersion`; a stale edit fails the publish/save rather than silently overwriting. See `docs/v2/config-db-schema.md` for the schema.
|
||||
|
||||
Environmental knobs that aren't per-tenant (bind address, port, PKI path) still live in `appsettings.json` on the Server project; everything tenant-scoped moved to the Config DB.
|
||||
Environmental knobs that aren't per-tenant — bind address, port, PKI store root, security profiles — are supplied to `OpcUaApplicationHostOptions` and resolved from `appsettings.json` on the Host project.
|
||||
|
||||
## Transport
|
||||
|
||||
The server binds one TCP endpoint per `ServerInstance` (default `opc.tcp://0.0.0.0:4840`). The `ApplicationConfiguration` is built programmatically in the `OpcUaApplicationHost` — there are no UA XML files. Security profiles (`None`, `Basic256Sha256-Sign`, `Basic256Sha256-SignAndEncrypt`) are resolved from the `ServerInstance.Security` JSON at startup; the default profile is still `None` for backward compatibility. User token policies (`Anonymous`, `UserName`) are attached based on whether LDAP is configured. See `docs/security.md` for hardening.
|
||||
The server binds a TCP endpoint at `opc.tcp://{PublicHostname}:{OpcUaPort}/OtOpcUa` (defaults `0.0.0.0:4840`). The `ApplicationConfiguration` is built programmatically in `OpcUaApplicationHost.BuildConfigurationAsync` — there are no UA XML files unless `ApplicationConfigPath` is set. Security profiles are listed in `OpcUaApplicationHostOptions.EnabledSecurityProfiles`; by default all three baseline profiles are exposed (`None`, `Basic256Sha256` + Sign, `Basic256Sha256` + SignAndEncrypt) and the SDK publishes one endpoint descriptor per profile. Production deployments typically drop `None`. User token policies (`Anonymous`, `UserName`) are always attached; the `UserName` policy is SDK-encrypted with the server certificate so it works on `None` endpoints too. See `docs/security.md` for hardening.
|
||||
|
||||
## Session impersonation
|
||||
|
||||
`OtOpcUaServer.OnImpersonateUser` handles the three token types:
|
||||
`OpcUaApplicationHost` subscribes to `SessionManager.ImpersonateUser` after `ApplicationInstance.Start`. The handler (`HandleImpersonation`) deals with the token types as follows:
|
||||
|
||||
- `AnonymousIdentityToken` → default anonymous `UserIdentity`.
|
||||
- `UserNameIdentityToken` → `IUserAuthenticator.AuthenticateAsync` validates the credential (`LdapUserAuthenticator` in production). On success, the resolved display name + LDAP-derived roles are wrapped in a `RoleBasedIdentity` that implements `IRoleBearer`. `DriverNodeManager.OnWriteValue` reads these roles via `context.UserIdentity is IRoleBearer` and applies `WriteAuthzPolicy` per write.
|
||||
- Anything else → `BadIdentityTokenInvalid`.
|
||||
- `UserNameIdentityToken` → the password is decrypted, then `IOpcUaUserAuthenticator.AuthenticateUserNameAsync` validates the credential (`LdapUserAuthenticator` in production, a stub in tests). On success a `UserIdentity` carrying the token is attached and the LDAP-derived roles are logged; on failure `ImpersonateEventArgs.IdentityValidationError` is set to `BadIdentityTokenRejected`.
|
||||
- `AnonymousIdentityToken` and X.509 tokens → the handler returns without intervening, so the SDK's default validation stands.
|
||||
|
||||
The Phase 6.2 `AuthorizationGate` runs on top of this baseline: when configured it consults the cluster's permission trie (loaded from `NodeAcl` rows) using the session's `UserAuthorizationState` and can deny Read / HistoryRead / Write / Browse independently per tag. See `docs/v2/acl-design.md`.
|
||||
Decryption failures and authenticator exceptions also map to `BadIdentityTokenRejected`.
|
||||
|
||||
## Dispatch
|
||||
## Authorization
|
||||
|
||||
Every service call the stack hands to `DriverNodeManager` is translated to the driver's capability interface and routed through `CapabilityInvoker`:
|
||||
|
||||
| Service | Capability | Invoker method |
|
||||
|---|---|---|
|
||||
| Read | `IReadable.ReadAsync` | `ExecuteAsync(DriverCapability.Read, host, …)` |
|
||||
| Write | `IWritable.WriteAsync` | `ExecuteWriteAsync(host, isIdempotent, …)` — honors `WriteIdempotentAttribute` (#143) |
|
||||
| CreateMonitoredItems / DeleteMonitoredItems | `ISubscribable.SubscribeAsync/UnsubscribeAsync` | `ExecuteAsync(DriverCapability.Subscribe, host, …)` |
|
||||
| HistoryRead (raw / processed / at-time / events) | `IHistoryProvider.*Async` | `ExecuteAsync(DriverCapability.HistoryRead, host, …)` |
|
||||
| ConditionRefresh / Acknowledge | `IAlarmSource.*Async` | via `AlarmSurfaceInvoker` (fans out per host) |
|
||||
|
||||
The host name fed to the invoker comes from `IPerCallHostResolver.ResolveHost(fullReference)` when the driver implements it (multi-host drivers: AB CIP, Modbus with per-device options). Single-host drivers fall back to `DriverInstanceId`, preserving pre-Phase-6.1 pipeline-key semantics (decision #144).
|
||||
Node-level authorization is backed by a permission trie under `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` (`PermissionTrie`, `PermissionTrieBuilder`, `PermissionTrieCache`, `TriePermissionEvaluator`, `NodeScope`, `UserAuthorizationState`, `AuthorizationDecision`). The trie is built from `NodeAcl` rows and a session's `UserAuthorizationState`, and an `IPermissionEvaluator` can return a per-tag `AuthorizationDecision` for Read / HistoryRead / Write / Browse independently. See `docs/v2/acl-design.md`.
|
||||
|
||||
## Redundancy
|
||||
|
||||
`Redundancy.Enabled = true` on the `ServerInstance` activates the `RedundancyStateActor` + `ServiceLevelCalculator` (`src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Redundancy/`). Standard OPC UA redundancy nodes (`Server/ServerRedundancy/RedundancySupport`, `ServerUriArray`, `Server/ServiceLevel`) are populated on startup; `ServiceLevel` recomputes whenever any driver's `DriverHealth` changes. The apply-lease mechanism prevents two instances from concurrently applying a generation. See `docs/Redundancy.md`.
|
||||
`Redundancy.Enabled = true` on the `ServerInstance` activates the `RedundancyStateActor` + `ServiceLevelCalculator` (`src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Redundancy/`). The OPC UA `Server/ServiceLevel` node (`VariableIds.Server_ServiceLevel`) is recomputed and republished via `SdkServiceLevelPublisher` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkServiceLevelPublisher.cs`, wired as `IServiceLevelPublisher`) whenever role or driver-health changes; `ServiceLevelCalculator` produces a 0–255 value where higher means more authoritative, so the primary advertises a higher ServiceLevel than the secondary. Clients also read the standard `Server/ServerRedundancy/RedundancySupport` and `Server/ServerRedundancy/ServerUriArray` properties the SDK exposes on the ServerObject. An apply-lease prevents two instances from concurrently applying a generation. See `docs/Redundancy.md`.
|
||||
|
||||
Peer endpoints are advertised through the standard `Server.ServerArray` property: `OpcUaApplicationHost` appends `OpcUaApplicationHostOptions.PeerApplicationUris` to `IServerInternal.ServerUris` after start so warm-redundancy clients can discover the partner.
|
||||
|
||||
## Server class hierarchy
|
||||
|
||||
### OtOpcUaServer extends StandardServer
|
||||
### OtOpcUaSdkServer extends StandardServer
|
||||
|
||||
- **`CreateMasterNodeManager`** — Iterates `_driverHost.RegisteredDriverIds`, builds one `DriverNodeManager` per driver with its own `CapabilityInvoker` + resilience options (tier from `DriverTypeRegistry`, per-instance JSON overrides from `DriverInstance.ResilienceConfig` via `DriverResilienceOptionsParser`). The managers are wrapped in a `MasterNodeManager` with no additional core managers.
|
||||
- **`OnServerStarted`** — Hooks `SessionManager.ImpersonateUser` for LDAP auth. Redundancy + server-capability population happens via `OpcUaApplicationHost`.
|
||||
- **`LoadServerProperties`** — Manufacturer `OtOpcUa`, Product `OtOpcUa.Server`, ProductUri `urn:OtOpcUa:Server`.
|
||||
- **`CreateMasterNodeManager`** — Constructs the single `OtOpcUaNodeManager` and wraps it in a `MasterNodeManager` with no extra core managers.
|
||||
- **`NodeManager`** — Public accessor exposing the live `OtOpcUaNodeManager` once the SDK has bootstrapped (null until `CreateMasterNodeManager` runs).
|
||||
|
||||
### ServerCapabilities
|
||||
|
||||
`OpcUaApplicationHost` populates `Server/ServerCapabilities` with `StandardUA2017`, `en` locale, 100 ms `MinSupportedSampleRate`, 4 MB message caps, and per-operation limits (1000 per Read/Write/Browse/TranslateBrowsePaths/MonitoredItems/HistoryRead; 0 for MethodCall/NodeManagement/HistoryUpdate).
|
||||
`ApplicationName`, `ApplicationUri` (`urn:OtOpcUa`), and `ProductUri` (`https://zb.com/otopcua`) come from `OpcUaApplicationHostOptions`, which the `ApplicationConfiguration` is built from in `OpcUaApplicationHost`.
|
||||
|
||||
## Certificate handling
|
||||
|
||||
Certificate stores default to `%LOCALAPPDATA%\OPC Foundation\pki\` (directory-based):
|
||||
Certificate stores are directory-based under `OpcUaApplicationHostOptions.PkiStoreRoot` (default `pki`, relative to the host's working directory):
|
||||
|
||||
| Store | Path suffix |
|
||||
|---|---|
|
||||
| Own | `pki/own` |
|
||||
| Own (application certificate) | `pki/own` |
|
||||
| Trusted issuers | `pki/issuer` |
|
||||
| Trusted peers | `pki/trusted` |
|
||||
| Rejected | `pki/rejected` |
|
||||
|
||||
`Security.AutoAcceptClientCertificates` (default `true`) and `RejectSHA1Certificates` (default `true`) are honored. The server certificate is always created — even for `None`-only deployments — because `UserName` token encryption needs it.
|
||||
`OpcUaApplicationHostOptions.AutoAcceptUntrustedClientCertificates` (default `false`) controls whether unknown client certificates are auto-trusted on first connection; production deployments leave it off and operators promote peers via the Admin UI. The application instance certificate is auto-created (SDK defaults: 2048-bit, 12-month lifetime) on first start against a fresh PKI tree, and the server certificate is always created — even for `None`-only deployments — because `UserName` token encryption needs it.
|
||||
|
||||
## Key source files
|
||||
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs` — `StandardServer` subclass
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs` — programmatic `ApplicationConfiguration` + lifecycle + `ImpersonateUser` hook
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` — SDK node manager + write-only address-space sink
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs` — `StandardServer` subclass wiring the single node manager
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs` — programmatic `ApplicationConfiguration` + lifecycle + `ImpersonateUser` hook + ServerArray population
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` — `CustomNodeManager2` owning the writable address space
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs` — `IOpcUaAddressSpaceSink` adapter the actor system pushes into
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — per-driver discovery + dispatch surface
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs` — driver registration
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs` — Polly pipeline entry point
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkServiceLevelPublisher.cs` — publishes the redundancy `ServiceLevel` node
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — driver-agnostic discovery walk + alarm routing
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs` — process-local driver registration + lifecycle
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs` — Polly pipeline entry point for capability calls
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/AlarmSurfaceInvoker.cs` — per-host fan-out wrapper for `IAlarmSource`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — permission trie + evaluator (`PermissionTrie`, `PermissionTrieCache`, `TriePermissionEvaluator`)
|
||||
|
||||
+6
-3
@@ -26,19 +26,21 @@ The project was originally called **LmxOpcUa** (a single-driver Galaxy/MXAccess
|
||||
| [OpcUaServer.md](OpcUaServer.md) | Top-level server architecture — Core, driver dispatch, Config DB, generations |
|
||||
| [AddressSpace.md](AddressSpace.md) | `GenericDriverNodeManager` + `ITagDiscovery` + `IAddressSpaceBuilder` |
|
||||
| [ReadWriteOperations.md](ReadWriteOperations.md) | OPC UA Read/Write → `CapabilityInvoker` → `IReadable`/`IWritable` |
|
||||
| [DriverLifecycle.md](DriverLifecycle.md) | Server-side driver lifecycle + infrastructure contracts (`IDriverFactory`, `IDriverProbe`, `IDriverSupervisor`, `IDriverHealthPublisher`, `IDriverConfigEditor`, `IHistorianDataSource`) + the Commons library |
|
||||
| [Subscriptions.md](v1/Subscriptions.md) | Monitored items → `ISubscribable` + per-driver subscription refcount (v1 archive) |
|
||||
| [AlarmTracking.md](v1/AlarmTracking.md) | `IAlarmSource` + `AlarmSurfaceInvoker` + OPC UA alarm conditions (v1 archive) |
|
||||
| [AlarmTracking.md](AlarmTracking.md) | `IAlarmSource` + `AlarmSurfaceInvoker` + OPC UA alarm conditions — native Galaxy alarms end-to-end (live) |
|
||||
| [AlarmTracking.md](v1/AlarmTracking.md) | Original alarm-tracking write-up (v1 archive) |
|
||||
| [AlarmHistorian.md](AlarmHistorian.md) | `Core.AlarmHistorian` store-and-forward SQLite sink — `SqliteStoreAndForwardSink`, `IAlarmHistorianWriter`, dead-letter/retry/eviction |
|
||||
| [DataTypeMapping.md](v1/DataTypeMapping.md) | Per-driver `DriverAttributeInfo` → OPC UA variable types (v1 archive — live mapping is in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Browse/DataTypeMap.cs`) |
|
||||
| [IncrementalSync.md](IncrementalSync.md) | Address-space rebuild on redeploy + `sp_ComputeGenerationDiff` |
|
||||
| [HistoricalDataAccess.md](v1/HistoricalDataAccess.md) | `IHistoryProvider` as a per-driver optional capability (v1 archive) |
|
||||
| [VirtualTags.md](VirtualTags.md) | `Core.Scripting` + `Core.VirtualTags` — Roslyn script sandbox, engine, dispatch alongside driver tags |
|
||||
| [ScriptedAlarms.md](ScriptedAlarms.md) | `Core.ScriptedAlarms` — script-predicate `IAlarmSource` + Part 9 state machine |
|
||||
|
||||
Two Core subsystems are shipped without a dedicated top-level doc; see the section in the linked doc:
|
||||
One Core subsystem is shipped without a dedicated top-level doc; see the section in the linked doc:
|
||||
|
||||
| Project | See |
|
||||
|---------|-----|
|
||||
| `Core.AlarmHistorian` | [AlarmTracking.md](v1/AlarmTracking.md) § Alarm historian sink (v1 archive) |
|
||||
| `Analyzers` (Roslyn OTOPCUA0001) | [security.md](security.md) § OTOPCUA0001 Analyzer |
|
||||
|
||||
### Drivers
|
||||
@@ -55,6 +57,7 @@ For Modbus / S7 / AB CIP / AB Legacy / TwinCAT / FOCAS / OPC UA Client specifics
|
||||
|
||||
| Doc | Covers |
|
||||
|-----|--------|
|
||||
| [Configuration.md](Configuration.md) | Live appsettings + environment-variable reference (current state) |
|
||||
| [Configuration.md](v1/Configuration.md) | appsettings bootstrap + Config DB + Admin UI draft/publish (v1 archive — `OTOPCUA_GALAXY_*` env vars now live in mxaccessgw config) |
|
||||
| [security.md](security.md) | Transport security profiles, LDAP auth, ACL trie, role grants, OTOPCUA0001 analyzer |
|
||||
| [Redundancy.md](Redundancy.md) | `RedundancyCoordinator`, `ServiceLevelCalculator`, apply-lease, Prometheus metrics |
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Read/Write Operations
|
||||
|
||||
`GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) wires the OPC UA stack's per-variable `OnReadValue` and `OnWriteValue` hooks to each driver's `IReadable` and `IWritable` capabilities. Every dispatch flows through `CapabilityInvoker` so the Polly pipeline (retry / timeout / breaker / bulkhead) applies uniformly across Galaxy, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client drivers.
|
||||
The v2 server routes OPC UA Read and Write operations to each driver's `IReadable` and `IWritable` capabilities through `CapabilityInvoker` so the Polly pipeline (retry / timeout / breaker / bulkhead) applies uniformly across Galaxy, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client drivers. The per-variable `OnReadValue` and `OnWriteValue` hooks described in the sections below live in `DriverNodeManager` (the planned ADR-002 Phase 7 Stream G successor to the v1 `DriverNodeManager`); `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) handles address-space population and alarm routing during discovery. The current `OtOpcUaNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`) is a push-model `CustomNodeManager2` that receives values from the Akka actor layer via `WriteValue`; OPC UA client reads return the cached pushed value.
|
||||
|
||||
## Driver vs virtual dispatch
|
||||
|
||||
@@ -52,7 +52,7 @@ Array-element writes via OPC UA `IndexRange` are driver-specific. The OPC UA sta
|
||||
|
||||
## HistoryRead
|
||||
|
||||
`DriverNodeManager.HistoryReadRawModified`, `HistoryReadProcessed`, `HistoryReadAtTime`, and `HistoryReadEvents` route through the driver's `IHistoryProvider` capability with `DriverCapability.HistoryRead`. Drivers without `IHistoryProvider` surface `BadHistoryOperationUnsupported` per node. See `docs/HistoricalDataAccess.md`.
|
||||
`DriverNodeManager.HistoryReadRawModified`, `HistoryReadProcessed`, `HistoryReadAtTime`, and `HistoryReadEvents` route through the driver's `IHistoryProvider` capability with `DriverCapability.HistoryRead`. Drivers without `IHistoryProvider` surface `BadHistoryOperationUnsupported` per node. See `docs/v1/HistoricalDataAccess.md`.
|
||||
|
||||
## Failure isolation
|
||||
|
||||
@@ -60,7 +60,8 @@ Per decision #12, exceptions in the driver's capability call are logged and conv
|
||||
|
||||
## Key source files
|
||||
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — `OnReadValue` / `OnWriteValue` hooks
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — address-space population and alarm routing during discovery
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` — push-model `CustomNodeManager2`; `EnsureVariable` / `WriteValue` are the v2 read/write path
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — permission trie + evaluator (`PermissionTrie`, `PermissionTrieCache`, `TriePermissionEvaluator`) that gates Read/Write/Subscribe per the session's resolved LDAP groups
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs` — `ExecuteAsync` / `ExecuteWriteAsync`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IReadable.cs`, `IWritable.cs`, `WriteIdempotentAttribute.cs`
|
||||
|
||||
+60
-17
@@ -6,21 +6,44 @@ OtOpcUa supports OPC UA **non-transparent** warm/hot redundancy. Two or more `Ot
|
||||
|
||||
> **Discovery surface.** The `ServerArray` path on the `Server` object is what each node populates with self + peer `ApplicationUri`s — see `OpcUaApplicationHost.PopulateServerArray` and the per-node `PeerApplicationUris` option below. The redundancy-object-type `ServerUriArray` proper (a child of `Server.ServerRedundancy`) remains deferred pending an SDK object-type upgrade; clients should read `Server.ServerArray` for peer discovery today.
|
||||
|
||||
> **v2 change.** v1's operator-managed `ClusterNode.RedundancyRole` column + `RedundancyCoordinator` / `ApplyLeaseRegistry` / `PeerHttpProbeLoop` are gone. Primary/secondary is now derived from **Akka cluster role-leader** for the `driver` role. The operator no longer writes a role into the DB; cluster topology + health drive ServiceLevel automatically.
|
||||
> **v2 change.** v1's operator-managed `ClusterNode.RedundancyRole` column + `RedundancyCoordinator` / `ApplyLeaseRegistry` / `PeerHttpProbeLoop` are gone. Primary/secondary is now derived from **Akka cluster role-leader** for the `driver` role. The operator no longer writes a role into the DB; cluster topology (specifically the `driver` role-leader) drives ServiceLevel automatically.
|
||||
|
||||
The runtime pieces live in:
|
||||
|
||||
| Component | Project | Role |
|
||||
|---|---|---|
|
||||
| `ServiceLevelCalculator` | `OtOpcUa.ControlPlane.Redundancy` | Pure function `(NodeHealthInputs) → byte`. No side effects. |
|
||||
| `RedundancyStateActor` | `OtOpcUa.ControlPlane.Redundancy` | Admin-role cluster singleton; subscribes to cluster topology events, debounces 250ms, broadcasts `RedundancyStateChanged` on the `redundancy-state` DPS topic. |
|
||||
| `DbHealthProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; runs `SELECT 1` against ConfigDb every 5s. Read by health endpoint + redundancy calc. |
|
||||
| `PeerOpcUaProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; pings peer `opc.tcp://peer:4840` (real probe call is staged for follow-up F12). |
|
||||
| `OpcUaPublishActor` | `OtOpcUa.Runtime.OpcUa` | Per-driver-node; subscribes to the `redundancy-state` topic, maps the local node's role to a ServiceLevel byte (see below), and forwards it to `IServiceLevelPublisher`. |
|
||||
| `IServiceLevelPublisher` / `SdkServiceLevelPublisher` | `OtOpcUa.Commons.OpcUa` / `OtOpcUa.OpcUaServer` | Writes the byte into the SDK's `Server.ServiceLevel` Variable. Production binds `DeferredServiceLevelPublisher`, which swaps in the real `SdkServiceLevelPublisher` once the SDK is up (it needs `IServerInternal`, available only after `StandardServer.Start`); until then writes route through `NullServiceLevelPublisher`. |
|
||||
| `ServiceLevelCalculator` | `OtOpcUa.ControlPlane.Redundancy` | Pure function `(NodeHealthInputs) → byte` — the fuller DB/probe-aware tiering (see truth table below). Covered by `ServiceLevelCalculatorTests`; **not yet wired into the live driver publish path**, which uses the coarse role mapping in `OpcUaPublishActor`. |
|
||||
| `DbHealthProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; runs `SELECT 1` against ConfigDb every 5s. Read by health endpoint. |
|
||||
| `PeerOpcUaProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; pings peer `opc.tcp://peer:4840` with a TCP connect (2s timeout) and publishes the result on the `redundancy-state` topic. A full secure-channel Hello handshake is a possible future upgrade; the TCP connect is the current real probe. |
|
||||
| `ClusterRoleInfo` | `OtOpcUa.Cluster` | Live view of cluster membership + role-leader; exposes `IClusterRoleInfo` to the rest of the host. |
|
||||
|
||||
## ServiceLevel tiers (Part 5 §6.5)
|
||||
## ServiceLevel tiers
|
||||
|
||||
`ServiceLevelCalculator.Compute(NodeHealthInputs)` returns a byte in 0..255 by tier:
|
||||
### Live driver-side mapping (current)
|
||||
|
||||
`OpcUaPublishActor.HandleRedundancyStateChanged` maps the local node's role
|
||||
(from the `RedundancyStateChanged` snapshot) to a ServiceLevel byte and forwards
|
||||
it through `IServiceLevelPublisher` to the SDK's `Server.ServiceLevel` Variable:
|
||||
|
||||
| Local role | Byte |
|
||||
|---|---|
|
||||
| `Primary` and `driver` role-leader | 240 |
|
||||
| `Primary` (not role-leader) | 200 |
|
||||
| `Secondary` | 100 |
|
||||
| `Detached` (no `driver` role) | 0 |
|
||||
|
||||
Roles come from `RedundancyStateActor.BuildSnapshot`: a node with the `driver`
|
||||
role is `Primary` when it holds the `driver` role-leader lease, otherwise
|
||||
`Secondary`; a node without the `driver` role is `Detached`.
|
||||
|
||||
### Full health-aware tiering (`ServiceLevelCalculator`)
|
||||
|
||||
`ServiceLevelCalculator.Compute(NodeHealthInputs)` is the fuller, DB/probe-aware
|
||||
calculation. It is unit-tested but **not yet on the live publish path** — the
|
||||
driver-side mapping above is what actually drives the SDK today.
|
||||
|
||||
| Tier | Byte | Condition |
|
||||
|---|---|---|
|
||||
@@ -28,16 +51,16 @@ The runtime pieces live in:
|
||||
| Critically degraded | 100 | ConfigDb unreachable AND data is stale. |
|
||||
| Stale | 200 | Data stale but ConfigDb reachable. |
|
||||
| Healthy follower | 240 | DB ok + OPC UA probe ok + not stale. |
|
||||
| Healthy leader | 250 | Healthy + this node is the `driver` role-leader. |
|
||||
| Healthy leader | 250 | Healthy follower (240) + a `+10` bonus when this node is the `driver` role-leader. |
|
||||
|
||||
Drivers write their computed byte into the OPC UA `ServiceLevel` Variable on each refresh. Clients with the standard redundancy heuristic ("pick the highest ServiceLevel") therefore prefer the role-leader and fall back to followers on its degradation.
|
||||
Either way, clients with the standard redundancy heuristic ("pick the highest
|
||||
ServiceLevel") prefer the `driver` role-leader and fall back to followers on its
|
||||
degradation.
|
||||
|
||||
## Data flow
|
||||
|
||||
```
|
||||
Cluster topology event ──┐
|
||||
DB health probe ─────────┤
|
||||
OPC UA peer probe ───────┤
|
||||
▼
|
||||
RedundancyStateActor (admin singleton)
|
||||
│ debounce 250ms
|
||||
@@ -46,14 +69,22 @@ OPC UA peer probe ───────┤
|
||||
│
|
||||
▼
|
||||
Driver nodes' OpcUaPublishActor
|
||||
│ role → byte (240/200/100/0)
|
||||
▼
|
||||
IServiceLevelPublisher (SdkServiceLevelPublisher)
|
||||
│
|
||||
▼
|
||||
ServiceLevelCalculator → byte
|
||||
│
|
||||
▼
|
||||
OPC UA ServiceLevel Variable
|
||||
OPC UA Server.ServiceLevel Variable
|
||||
```
|
||||
|
||||
Today only cluster topology drives the published ServiceLevel.
|
||||
`PeerOpcUaProbeActor` and `DbHealthProbeActor` also run per-node — the peer probe
|
||||
publishes `OpcUaProbeResult` onto the `redundancy-state` topic and the DB probe
|
||||
backs the health endpoint — but their outputs are not yet consumed by
|
||||
`RedundancyStateActor` or folded into the published byte. They are the inputs the
|
||||
fuller `ServiceLevelCalculator` truth table is designed to use once that path goes
|
||||
live.
|
||||
|
||||
The admin singleton is the cluster's only `RedundancyStateActor`. If the admin leader fails over, the new admin node spins up its replacement, re-subscribes to cluster events, and publishes a fresh snapshot from the current `Cluster.State`. There is no DB-persisted state to recover.
|
||||
|
||||
## Configuration
|
||||
@@ -78,15 +109,17 @@ OTOPCUA_ROLES=admin,driver
|
||||
|
||||
Both nodes share the same `ConfigDb` connection string; `Cluster.PublicHostname` + `Roles` are what makes them distinct in cluster gossip. The first node bootstraps the cluster (its address goes in `SeedNodes`); the second node joins via the same `SeedNodes` list.
|
||||
|
||||
There is no longer a `Node:NodeId` setting, no `ClusterNode.RedundancyRole`, no `ServiceLevelBase`. NodeId is derived as `host:port` of the cluster `PublicHostname` (see `ClusterRoleInfo.LocalNode` for the formula).
|
||||
There is no longer a `Node:NodeId` setting and no `ClusterNode.RedundancyRole` column (the V2 migration dropped it — primary/secondary is now derived from cluster role-leadership). NodeId is derived as `host:port` of the cluster `PublicHostname` (see `ClusterRoleInfo.LocalNode` for the formula).
|
||||
|
||||
The `ClusterNode.ServiceLevelBase` column still exists and is editable in the Admin UI (NodeEdit / Cluster Redundancy pages), but it no longer drives the runtime ServiceLevel — that value is computed from cluster role/health and published per the mapping above, independent of this stored preference.
|
||||
|
||||
### Peer URI advertising
|
||||
|
||||
Each node advertises its partner via `OpcUaApplicationHostOptions.PeerApplicationUris` (an `IList<string>`, default empty). `OpcUaApplicationHost.PopulateServerArray` appends each configured peer URI to the SDK's `IServerInternal.ServerUris` string table after server startup, so that `Server.ServerArray` reads served by `OnReadServerArray` return both self + peers. Set this per-node in `appsettings.json`:
|
||||
Each node advertises its partner via `OpcUaApplicationHostOptions.PeerApplicationUris` (an `IList<string>`, default empty). `OpcUaApplicationHost.PopulateServerArray` appends each configured peer URI to the SDK's `IServerInternal.ServerUris` string table after server startup, so that `Server.ServerArray` reads served by `OnReadServerArray` return both self + peers. The options bind from the `OpcUa` config section (see `Program.cs` — `AddValidatedOptions<OpcUaApplicationHostOptions>(…, "OpcUa")`). Set this per-node in `appsettings.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"OpcUaServer": {
|
||||
"OpcUa": {
|
||||
"PeerApplicationUris": ["urn:node-b:OtOpcUa"]
|
||||
}
|
||||
}
|
||||
@@ -104,6 +137,16 @@ There is no operator-driven role swap during a partition. Failover is what the c
|
||||
|
||||
The OtOpcUa Client CLI at `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI` supports `-F` / `--failover-urls` for automatic client-side failover; for long-running subscriptions the CLI monitors session KeepAlive and reconnects to the next available server, recreating the subscription on the new endpoint. See [`Client.CLI.md`](Client.CLI.md).
|
||||
|
||||
## Observability
|
||||
|
||||
`OpcUaPublishActor` emits one metric on every ServiceLevel transition (it suppresses no-op repeats of the same byte):
|
||||
|
||||
| Metric | Type | Notes |
|
||||
|---|---|---|
|
||||
| `otopcua.redundancy.service_level_change` | Counter (`{change}`) | OPC UA `Server.ServiceLevel` transitions emitted by the redundancy state. Tagged with `level` = the new byte. |
|
||||
|
||||
The meter is defined on `OtOpcUaTelemetry` (`src/Core/ZB.MOM.WW.OtOpcUa.Commons/Observability/OtOpcUaTelemetry.cs`); it surfaces through whatever OpenTelemetry exporter the host configures.
|
||||
|
||||
## Depth reference
|
||||
|
||||
For the full design — message contracts, tiered calculator truth table, recovery semantics — see `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §6.
|
||||
|
||||
+17
-16
@@ -52,7 +52,7 @@ is refreshed, and they are eventually *released* — but never silently deleted.
|
||||
| `ClusterId` | The first cluster to publish the reservation. |
|
||||
| `FirstPublishedAt` / `FirstPublishedBy` | When and by whom the claim was first made. |
|
||||
| `LastPublishedAt` | Refreshed on every subsequent publish that re-asserts the same `(Kind, Value, EquipmentUuid)`. |
|
||||
| `ReleasedAt` / `ReleasedBy` / `ReleaseReason` | Non-null once a FleetAdmin explicitly releases the claim. A row with `ReleasedAt IS NULL` is *active*. |
|
||||
| `ReleasedAt` / `ReleasedBy` / `ReleaseReason` | Non-null once an Administrator explicitly releases the claim. `ReleasedBy` is the LDAP operator name (passed explicitly as `@ReleasedBy`; not `SUSER_SNAME()`). A row with `ReleasedAt IS NULL` is *active*. |
|
||||
|
||||
There is no foreign key from `EquipmentUuid` / `ClusterId` to their tables — by
|
||||
design, so a reservation survives the deletion or disabling of the equipment
|
||||
@@ -99,14 +99,16 @@ being disabled, the generation being superseded, or a rollback.
|
||||
|
||||
### 4. Release
|
||||
|
||||
Reusing an identifier for a **different** piece of equipment requires a
|
||||
FleetAdmin to explicitly release the existing claim. Release runs
|
||||
Reusing an identifier for a **different** piece of equipment requires an
|
||||
Administrator to explicitly release the existing claim. Release runs
|
||||
`sp_ReleaseExternalIdReservation`, which:
|
||||
|
||||
- Requires a non-empty **reason** — a hard audit invariant; the procedure
|
||||
raises an error without one.
|
||||
- Stamps `ReleasedAt`, `ReleasedBy` (`SUSER_SNAME()`), and `ReleaseReason`
|
||||
rather than deleting the row, so the history is preserved.
|
||||
- Requires a non-empty **`@ReleasedBy`** — the LDAP operator name supplied
|
||||
by the caller; the procedure raises an error without it.
|
||||
- Stamps `ReleasedAt`, `ReleasedBy` (the supplied operator name), and
|
||||
`ReleaseReason` rather than deleting the row, so the history is preserved.
|
||||
- Once released, the `(Kind, Value)` pair is free — a different
|
||||
`EquipmentUuid` can claim it on a future publish.
|
||||
|
||||
@@ -116,20 +118,19 @@ permanent for the life of the asset.
|
||||
|
||||
## The Admin page
|
||||
|
||||
`/reservations` (Admin UI) is the operator surface. It is **FleetAdmin-only**
|
||||
(the `CanPublish` policy).
|
||||
`/reservations` (Admin UI) is the operator surface. It requires authentication
|
||||
(`[Authorize]`) but is not restricted to a specific Admin UI role — any signed-in
|
||||
user can view it.
|
||||
|
||||
- **Active** table — every reservation with `ReleasedAt IS NULL`: kind, value,
|
||||
owning `EquipmentUuid`, cluster, and the first/last publish stamps. Each row
|
||||
has a **Release…** action.
|
||||
- **Released** table — the 100 most recently released reservations, with the
|
||||
releasing user and reason.
|
||||
- **Release dialog** — opened from an active row; it requires a reason before
|
||||
the Release button will submit, mirroring the procedure's audit invariant.
|
||||
The page is a **read-only flat list** of all `ExternalIdReservation` rows,
|
||||
ordered by Kind then Value. It shows Kind, Value, owning `EquipmentUuid`, and
|
||||
Cluster. There is no Active/Released split, no Release action, and no Release
|
||||
dialog on this page.
|
||||
|
||||
You cannot *create* a reservation from this page — reservations only ever come
|
||||
into existence as a side-effect of publishing a generation. The page is for
|
||||
inspection and for the release flow.
|
||||
into existence as a side-effect of publishing a generation. The release flow
|
||||
is described in `docs/v2/admin-ui.md` § "Release an external-ID reservation"
|
||||
and runs via `sp_ReleaseExternalIdReservation`.
|
||||
|
||||
## Related
|
||||
|
||||
|
||||
+16
-13
@@ -6,7 +6,7 @@ This file covers the engine internals — predicate evaluation, state machine, p
|
||||
|
||||
## Definition shape
|
||||
|
||||
`ScriptedAlarmDefinition` (`src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/ScriptedAlarmDefinition.cs`) is the runtime contract the engine consumes. The generation-publish path materialises these from the `ScriptedAlarm` + `Script` config tables via `Phase7EngineComposer.ProjectScriptedAlarms`.
|
||||
`ScriptedAlarmDefinition` (`src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/ScriptedAlarmDefinition.cs`) is the runtime contract the engine consumes. The generation-publish path materialises these from the `ScriptedAlarm` + `Script` config tables via `Phase7Composer.Compose` + the driver-role host actor startup path.
|
||||
|
||||
| Field | Notes |
|
||||
|---|---|
|
||||
@@ -14,7 +14,7 @@ This file covers the engine internals — predicate evaluation, state machine, p
|
||||
| `EquipmentPath` | UNS path the alarm hangs under in the address space. ACL scope inherits from the equipment node. |
|
||||
| `AlarmName` | Browse-tree display name. |
|
||||
| `Kind` | `AlarmKind` — `AlarmCondition`, `LimitAlarm`, `DiscreteAlarm`, or `OffNormalAlarm`. Controls only the OPC UA ObjectType the node surfaces as; the internal state machine is identical for all four. |
|
||||
| `Severity` | `AlarmSeverity` enum (`Low` / `Medium` / `High` / `Critical`). Static per decision #13 — the predicate does not compute severity. The DB column is an OPC UA Part 9 1..1000 integer; `Phase7EngineComposer.MapSeverity` bands it into the four-value enum. |
|
||||
| `Severity` | `AlarmSeverity` enum (`Low` / `Medium` / `High` / `Critical`), defined in `Core.Abstractions/IAlarmSource.cs`. Static per decision #13 — the predicate does not compute severity. The publish path bands the configured value into this four-value enum before materialising the `ScriptedAlarmDefinition`. |
|
||||
| `MessageTemplate` | String with `{TagPath}` placeholders, resolved at emission time. See below. |
|
||||
| `PredicateScriptSource` | Roslyn C# script returning `bool`. `true` = condition active; `false` = cleared. |
|
||||
| `HistorizeToAveva` | When true, every emission is enqueued to `IAlarmHistorianSink`. Default true. Galaxy-native alarms default false since Galaxy historises them directly. |
|
||||
@@ -92,7 +92,7 @@ Predicate evaluation and message-template resolution deliberately treat tag-inpu
|
||||
|
||||
## State persistence
|
||||
|
||||
`IAlarmStateStore` (`IAlarmStateStore.cs`) is the persistence contract: `LoadAsync(alarmId)`, `LoadAllAsync`, `SaveAsync(state)`, `RemoveAsync(alarmId)`. `InMemoryAlarmStateStore` in the same file is the default for tests and dev deployments without a SQL backend. Stream E wires the production implementation against the `ScriptedAlarmState` config-DB table with audit logging through `Core.Abstractions.IAuditLogger`.
|
||||
`IAlarmStateStore` (`IAlarmStateStore.cs`) is the persistence contract: `LoadAsync(alarmId)`, `LoadAllAsync`, `SaveAsync(state)`, `RemoveAsync(alarmId)`. `InMemoryAlarmStateStore` in the same file is the default for tests and dev deployments without a SQL backend. The production implementation is `EfAlarmActorStateStore` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/EfAlarmActorStateStore.cs`), which persists to the `ScriptedAlarmState` config-DB table via `IAlarmActorStateStore`.
|
||||
|
||||
Persisted scope per plan decision #14: `Enabled`, `Acked`, `Confirmed`, `Shelving`, `LastTransitionUtc`, the `LastAck*` / `LastConfirm*` audit fields, and the append-only `Comments` list. `Active` is **not** trusted across restart — the engine re-runs the predicate at `LoadAsync` so operators never re-ack an alarm that was already acknowledged before an outage, and alarms whose condition cleared during downtime settle to `Inactive` without a spurious clear-event.
|
||||
|
||||
@@ -111,15 +111,17 @@ Emissions map into `AlarmEventArgs` as `AlarmType = Kind.ToString()`, `SourceNod
|
||||
|
||||
## Composition
|
||||
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) is the single call site that instantiates the engine. It takes the generation's `Script` / `VirtualTag` / `ScriptedAlarm` rows, the shared upstream-tag source, an `IAlarmStateStore`, and an `IAlarmHistorianSink`, and returns the composed sources the caller owns. When `scriptedAlarms.Count > 0`:
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) is a pure data composer; it has no knowledge of `ScriptedAlarmEngine`. It maps `ScriptedAlarm` config-DB rows into `ScriptedAlarmPlan` records that the driver-role host actor startup path consumes.
|
||||
|
||||
1. `ProjectScriptedAlarms` resolves each row's `PredicateScriptId` against the script dictionary and produces a `ScriptedAlarmDefinition` list. Unknown or disabled scripts throw immediately — the DB publish guarantees referential integrity but this is a belt-and-braces check.
|
||||
2. A `ScriptedAlarmEngine` is constructed with the upstream source, the store, a shared `ScriptLoggerFactory` keyed to `scripts-*.log`, and the root Serilog logger.
|
||||
3. `alarmEngine.OnEvent` is wired to `RouteToHistorianAsync`, which projects each emission into an `AlarmHistorianEvent` and enqueues it on the sink. Fire-and-forget — the SQLite store-and-forward sink is already non-blocking.
|
||||
4. `LoadAsync(alarmDefs)` runs synchronously on the startup thread: it compiles every predicate, subscribes to the union of predicate inputs and message-template tokens, seeds the value cache, loads persisted state, re-derives `ActiveState` from a fresh predicate evaluation, and starts the 5s shelving timer. Compile failures are aggregated into one `InvalidOperationException` so operators see every bad predicate in one startup log line rather than one at a time.
|
||||
5. A `ScriptedAlarmSource` is created for the event stream; the v2 `ScriptedAlarmActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`) owns the active-state surface for OPC UA variable reads on the alarm's active-state node (task #245) — unknown alarm ids return `BadNodeIdUnknown` rather than silently reading `false`.
|
||||
In the v2 actor system, scripted-alarm engine composition is owned by the driver-role host:
|
||||
|
||||
Both engine and source are added to `Phase7ComposedSources.Disposables`, which `Phase7Composer` disposes on server shutdown.
|
||||
1. The host reads the generation's `ScriptedAlarm` + `Script` rows and resolves each row's `PredicateScriptId` to produce a `ScriptedAlarmDefinition` list. Unknown or disabled scripts fail fast — the DB publish guarantees referential integrity but this is a belt-and-braces check.
|
||||
2. A `ScriptedAlarmEngine` is constructed with the upstream-tag source, an `IAlarmStateStore` (production: `EfAlarmActorStateStore`), a shared `ScriptLoggerFactory` keyed to `scripts-*.log`, and the root Serilog logger.
|
||||
3. `alarmEngine.OnEvent` is wired to the historian sink. Fire-and-forget — the SQLite store-and-forward sink is already non-blocking.
|
||||
4. `LoadAsync(alarmDefs)` runs on startup: it compiles every predicate, subscribes to the union of predicate inputs and message-template tokens, seeds the value cache, loads persisted state, re-derives `ActiveState` from a fresh predicate evaluation, and starts the 5s shelving timer. Compile failures are aggregated into one `InvalidOperationException` so operators see every bad predicate in one startup log line rather than one at a time.
|
||||
5. A `ScriptedAlarmSource` is created for the event stream. The v2 `ScriptedAlarmActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`) owns the active-state surface for OPC UA variable reads on the alarm's condition-state node — unknown alarm ids return `BadNodeIdUnknown` rather than silently reading `false`.
|
||||
|
||||
Both engine and source are disposed on server shutdown via the driver-role host teardown path.
|
||||
|
||||
## Key source files
|
||||
|
||||
@@ -129,10 +131,11 @@ Both engine and source are added to `Phase7ComposedSources.Disposables`, which `
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/Part9StateMachine.cs` — pure-function state machine + `TransitionResult` / `EmissionKind`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/AlarmConditionState.cs` — persisted state record + `AlarmComment` audit entry + `ShelvingState`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/AlarmPredicateContext.cs` — script-side `ScriptContext` (read-only, write rejected)
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/AlarmTypes.cs` — `AlarmKind` + the four Part 9 enums
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/AlarmTypes.cs` — `AlarmKind` + `ShelvingKind` + four Part 9 state enums (`AlarmEnabledState`, `AlarmActiveState`, `AlarmAckedState`, `AlarmConfirmedState`); `AlarmSeverity` (`Low`/`Medium`/`High`/`Critical`) lives in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IAlarmSource.cs`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/MessageTemplate.cs` — `{path}` placeholder resolver
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/IAlarmStateStore.cs` — persistence contract + `InMemoryAlarmStateStore` default
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — composition, config-row projection, historian routing
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — pure data composer: config-DB entities → `Phase7CompositionResult` (UNS topology + driver/alarm plans)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — applies the composed Phase 7 plan into the SDK node manager
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs` — actor wrapper owning the alarm state machine and exposing `ActiveState` for OPC UA variable reads
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs` — actor that owns the per-alarm state machine; publishes `AlarmTransitionEvent` on the cluster `alerts` DPS topic
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/EfAlarmActorStateStore.cs` — production `IAlarmActorStateStore` backed by the `ScriptedAlarmState` config-DB table
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynScriptedAlarmEvaluator.cs` — production Roslyn predicate evaluator
|
||||
|
||||
@@ -7,7 +7,7 @@ A production OtOpcUa deployment runs **one binary per node**, plus the optional
|
||||
| Process | Project | Runtime | Platform | Responsibility |
|
||||
|---|---|---|---|---|
|
||||
| **OtOpcUa Host** | `src/Server/ZB.MOM.WW.OtOpcUa.Host` | .NET 10 | AnyCPU | Single fused binary. `OTOPCUA_ROLES` env decides what to mount: `admin` (Blazor + auth + control-plane singletons), `driver` (OPC UA endpoint + per-driver actors), or both. |
|
||||
| **OtOpcUa Wonderware Historian** *(optional)* | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware` | .NET Framework 4.8 | x86 (32-bit) | Out-of-process sidecar exposing the Wonderware Historian SDK over a named pipe. Required only when `Historian:Wonderware:Enabled=true`. |
|
||||
| **OtOpcUa Wonderware Historian** *(optional)* | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware` | .NET Framework 4.8 | x64 (64-bit) | Out-of-process sidecar exposing the Wonderware Historian SDK over a named pipe. Required only when `Historian:Wonderware:Enabled=true`. |
|
||||
|
||||
Galaxy access still uses the separately-installed **mxaccessgw** sidecar (see `docs/v2/Galaxy.ParityRig.md`); the gateway owns the MXAccess COM bitness constraint (its worker is x86 net48). Nothing in the OtOpcUa repo carries that constraint anymore.
|
||||
|
||||
@@ -66,15 +66,15 @@ Both admin and driver nodes expose:
|
||||
| `/health/ready` | ConfigDb reachable + cluster member state is `Up`. |
|
||||
| `/health/active` | Admin-role leader (the node Traefik or an HA LB should route traffic to). |
|
||||
|
||||
Used by Traefik for the active-leader-only routing pattern (see [Task 63 traefik docs](v2/Architecture-v2.md) — TODO).
|
||||
Used by Traefik for the active-leader-only routing pattern (see [Architecture-v2.md](v2/Architecture-v2.md)).
|
||||
|
||||
## OtOpcUa Wonderware Historian (optional)
|
||||
|
||||
Unchanged from v1. Pipe IPC contract lives in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/Contracts/`; sidecar pipe handler in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Pipe/`. Install via `scripts/install/Install-Services.ps1 -InstallWonderwareHistorian`.
|
||||
Unchanged from v1. IPC contract types live in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/`; sidecar pipe handler in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Ipc/`. Install via `scripts/install/Install-Services.ps1 -InstallWonderwareHistorian`.
|
||||
|
||||
## Install / Uninstall
|
||||
|
||||
- `scripts/install/Install-Services.ps1 -Roles admin,driver` — installs `OtOpcUaHost`. v2 rewrite tracked as plan Task 62.
|
||||
- `scripts/install/Install-Services.ps1 -Roles admin,driver` — installs `OtOpcUaHost`.
|
||||
- `scripts/install/Uninstall-Services.ps1` — stops + removes the host service (and the historian sidecar if installed).
|
||||
|
||||
## Logging
|
||||
|
||||
+30
-28
@@ -1,8 +1,8 @@
|
||||
# Virtual Tags
|
||||
|
||||
Virtual tags are OPC UA variable nodes whose values are computed by operator-authored C# scripts against other tags (driver or virtual). They live in the Equipment browse tree alongside driver-sourced variables: a client browsing `Enterprise/Site/Area/Line/Equipment/` sees one flat child list that mixes both kinds, and a read / subscribe on a virtual node looks identical to one on a driver node from the wire. The separation is server-side — `NodeScopeResolver` tags each variable's `NodeSource` (`Driver` / `Virtual` / `ScriptedAlarm`), and `DriverNodeManager` dispatches reads to different backends accordingly. See [ADR-002](v2/implementation/adr-002-driver-vs-virtual-dispatch.md) for the dispatch decision.
|
||||
Virtual tags are OPC UA variable nodes whose values are computed by operator-authored C# scripts against other tags (driver or virtual). They live in the Equipment browse tree alongside driver-sourced variables: a client browsing `Enterprise/Site/Area/Line/Equipment/` sees one flat child list that mixes both kinds, and a read / subscribe on a virtual node looks identical to one on a driver node from the wire. The separation is server-side — `EquipmentNodeWalker` stamps each `DriverAttributeInfo` with `NodeSourceKind` (`Driver` / `Virtual` / `ScriptedAlarm`) at address-space build time, and `GenericDriverNodeManager` routes reads to different backends accordingly. See [ADR-002](v2/implementation/adr-002-driver-vs-virtual-dispatch.md) for the dispatch decision.
|
||||
|
||||
The runtime is split across two projects: `Core.Scripting` holds the Roslyn sandbox + evaluator primitives that are reused by both virtual tags and scripted alarms; `Core.VirtualTags` holds the engine that owns the dependency graph, the evaluation pipeline, and the `ISubscribable` adapter the server dispatches to.
|
||||
The runtime is split across two projects: `Core.Scripting` holds the Roslyn sandbox + evaluator primitives that are reused by both virtual tags and scripted alarms; `Core.VirtualTags` holds the engine that owns the dependency graph, the evaluation pipeline, and the `ISubscribable` adapter the server dispatches to. In the v2 actor system, `VirtualTagActor` + `DependencyMuxActor` (in `Core.Runtime`) own the per-instance state and upstream-feed wiring; `RoslynVirtualTagEvaluator` (in `Host.Engines`) is the production `IVirtualTagEvaluator` binding.
|
||||
|
||||
## Roslyn script sandbox (`Core.Scripting`)
|
||||
|
||||
@@ -10,15 +10,19 @@ User scripts are compiled via `Microsoft.CodeAnalysis.CSharp` (regular compiler,
|
||||
|
||||
### Compile pipeline (`ScriptEvaluator<TContext, TResult>`)
|
||||
|
||||
`ScriptEvaluator.Compile(source)` is a three-step gate:
|
||||
`ScriptEvaluator.Compile(source)` is a five-step gate:
|
||||
|
||||
1. **Roslyn compile** against `ScriptSandbox.Build(contextType)`. Throws `CompilationErrorException` on syntax / type errors.
|
||||
2. **`ForbiddenTypeAnalyzer.Analyze`** walks the syntax tree post-compile and resolves every referenced symbol against the deny-list. Throws `ScriptSandboxViolationException` with every offending source span attached. This is defence-in-depth: `ScriptOptions` alone cannot block every BCL namespace because .NET type forwarding routes types through assemblies the allow-list does permit.
|
||||
3. **Delegate materialization** — `script.CreateDelegate()`. Failures here are Roslyn-internal; user scripts don't reach this step.
|
||||
1. **Injection guard** — `EnforceSingleRunMember` parses the synthesized wrapper and rejects sources whose brace structure would inject sibling methods or type declarations alongside the `CompiledScript.Run` wrapper method. Throws `CompilationErrorException` with diagnostic id `LMX001`/`LMX002` (Core.Scripting-013).
|
||||
2. **Roslyn compile** against `ScriptSandbox.Build(contextType)`. Throws `CompilationErrorException` on syntax / type errors.
|
||||
3. **`ForbiddenTypeAnalyzer.Analyze`** walks the syntax tree post-compile and resolves every referenced symbol against the deny-list. Throws `ScriptSandboxViolationException` with every offending source span attached. This is defence-in-depth: `ScriptOptions` alone cannot block every BCL namespace because .NET type forwarding routes types through assemblies the allow-list does permit.
|
||||
4. **PE emit** — `CSharpCompilation.Emit` writes the assembly to a `MemoryStream`. Failures here are Roslyn-internal; user scripts don't reach this step.
|
||||
5. **ALC load + delegate bind** — loads the emitted assembly into a collectible `ScriptAssemblyLoadContext` and binds a typed `Func<ScriptGlobals<TContext>, TResult>` delegate to the `CompiledScript.Run` method.
|
||||
|
||||
`ScriptSandbox.Build` allow-lists exactly: `System.Private.CoreLib` (primitives + `Math` + `Convert`), `System.Linq`, `Core.Abstractions` (for `DataValueSnapshot` / `DriverDataType`), `Core.Scripting` (for `ScriptContext` + `Deadband`), `Serilog` (for `ILogger`), and the concrete context type's assembly. Pre-imported namespaces: `System`, `System.Linq`, `ZB.MOM.WW.OtOpcUa.Core.Abstractions`, `ZB.MOM.WW.OtOpcUa.Core.Scripting`.
|
||||
`ScriptSandbox.Build` constructs the compile reference set in two parts. First, four pinned OtOpcUa assemblies are always included: `Core.Abstractions` (for `DataValueSnapshot` / `DriverDataType`), `Core.Scripting` (for `ScriptContext` + `Deadband`), `Serilog` (for `ILogger`), and the concrete context type's assembly. Second, the BCL subset is enumerated from the runtime's `TRUSTED_PLATFORM_ASSEMBLIES` list, restricted to filenames starting with `System.*` plus `netstandard.dll`, `mscorlib.dll`, and `Microsoft.Win32.Registry.dll` (the last needed so `ForbiddenTypeAnalyzer` can resolve and reject registry types). Pre-imported namespaces: `System`, `System.Linq`, `ZB.MOM.WW.OtOpcUa.Core.Abstractions`, `ZB.MOM.WW.OtOpcUa.Core.Scripting`.
|
||||
|
||||
`ForbiddenTypeAnalyzer.ForbiddenNamespacePrefixes` currently denies `System.IO`, `System.Net`, `System.Diagnostics`, `System.Reflection`, `System.Threading.Thread`, `System.Threading.Tasks`, `System.Runtime.InteropServices`, `Microsoft.Win32`. Matching is by prefix against the resolved symbol's containing namespace, so `System.Net` catches `System.Net.Http.HttpClient` and every subnamespace. `System.Threading.Tasks` is denied because scripts are synchronous predicates with no legitimate need to start background tasks — a `Task.Run` fan-out would outlive the per-evaluation timeout entirely (Core.Scripting-003). `System.Environment`, `System.AppDomain`, `System.GC`, and `System.Activator` are denied type-granularly via `ForbiddenFullTypeNames` because they live directly in the `System` namespace (which is otherwise allowed for primitives) — `Environment.Exit` / `FailFast` terminate the host process outright (Core.Scripting-001).
|
||||
`ForbiddenTypeAnalyzer.ForbiddenNamespacePrefixes` denies `System.IO`, `System.Net`, `System.Diagnostics`, `System.Reflection`, `System.Threading.Tasks`, `System.Runtime.InteropServices`, `System.Runtime.Loader`, and `Microsoft.Win32`. Matching is by prefix against the resolved symbol's containing namespace, so `System.Net` catches `System.Net.Http.HttpClient` and every subnamespace. `System.Threading.Tasks` is denied because scripts are synchronous predicates with no legitimate need to start background tasks — a `Task.Run` fan-out would outlive the per-evaluation timeout entirely (Core.Scripting-003). `System.Runtime.Loader` is denied to block `AssemblyLoadContext` / `AssemblyDependencyResolver` — arbitrary DLL loads into the host process (Core.Scripting-012).
|
||||
|
||||
`ForbiddenTypeAnalyzer.ForbiddenFullTypeNames` denies type-granularly: `System.Environment`, `System.AppDomain`, `System.GC`, `System.Activator`, `System.Threading.Thread`, `System.Threading.ThreadPool`, and `System.Threading.Timer`. These types require granular denial rather than namespace-prefix denial for different reasons: `Environment` / `AppDomain` / `GC` / `Activator` live directly in the `System` namespace (which is otherwise allowed for primitives), so a namespace-prefix rule cannot reach them without blocking `int` / `string` / `Math`; `Thread` / `ThreadPool` / `Timer` live in `System.Threading` (shared with allowed types like `CancellationToken` and `SemaphoreSlim`), so a prefix on `System.Threading` would block those too. `Environment.Exit` / `FailFast` terminate the host process outright (Core.Scripting-001); `Thread` and `ThreadPool` reintroduce background-fanout vectors that `System.Threading.Tasks` denial closed (Core.Scripting-010 / -012).
|
||||
|
||||
#### Known resource limits (accepted trade-offs)
|
||||
|
||||
@@ -94,35 +98,33 @@ Fire-and-forget sink for evaluation results when `VirtualTagDefinition.Historize
|
||||
|
||||
## Dispatch integration
|
||||
|
||||
Per [ADR-002](v2/implementation/adr-002-driver-vs-virtual-dispatch.md) Option B, there is a single `DriverNodeManager`. `VirtualTagSource` implements `IReadable` + `ISubscribable` over a `VirtualTagEngine`:
|
||||
Per [ADR-002](v2/implementation/adr-002-driver-vs-virtual-dispatch.md) Option B, there is a single `GenericDriverNodeManager`. `VirtualTagSource` implements `IReadable` + `ISubscribable` over a `VirtualTagEngine`:
|
||||
|
||||
- `ReadAsync` fans each path through `engine.Read(...)`.
|
||||
- `SubscribeAsync` calls `engine.Subscribe` per path and forwards each engine observer callback as an `OnDataChange` event; emits an initial-data callback per OPC UA convention.
|
||||
- `UnsubscribeAsync` disposes every per-path engine subscription it holds.
|
||||
- **`IWritable` is deliberately not implemented.** `DriverNodeManager.IsWriteAllowedBySource` rejects OPC UA client writes to virtual nodes with `BadUserAccessDenied` before any dispatch — scripts are the only write path via `ctx.SetVirtualTag`.
|
||||
- **`IWritable` is deliberately not implemented.** Virtual-tag nodes are not client-writable because `OtOpcUaNodeManager.EnsureVariable` materialises every SDK variable with `AccessLevel = AccessLevels.CurrentRead`; the SDK base `CustomNodeManager2.Write` returns `BadNotWritable` for read-only nodes and v2 has no client-write dispatch path. Scripts are the only write path via `ctx.SetVirtualTag`.
|
||||
|
||||
`DriverNodeManager.SelectReadable(source, ...)` picks the `IReadable` based on `NodeSourceKind`. See [ReadWriteOperations.md](ReadWriteOperations.md) and [Subscriptions.md](Subscriptions.md) for the broader dispatch framing.
|
||||
`NodeSourceKind` on each `DriverAttributeInfo` (set by `EquipmentNodeWalker` at address-space build time) drives which backend handles a read. See [ReadWriteOperations.md](ReadWriteOperations.md) and [v1/Subscriptions.md](v1/Subscriptions.md) for the broader dispatch framing.
|
||||
|
||||
## Upstream reads + history
|
||||
|
||||
`ITagUpstreamSource` and `IHistoryWriter` are the two ports the engine requires from its host. Both live in `Core.VirtualTags`. In the Server process:
|
||||
`ITagUpstreamSource` and `IHistoryWriter` are the two ports the engine requires from its host. Both live in `Core.VirtualTags`. In the v2 actor system:
|
||||
|
||||
- **Upstream-tag feed.** In v2 the upstream-tag feed is provided by the actor system. `DependencyMuxActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs`) multiplexes driver `ISubscribable` subscriptions for every fullRef the script graph references, translating driver-opaque fullRefs back to UNS paths via a reverse map. Deltas land on `VirtualTagActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/VirtualTagActor.cs`) as `DependencyValueChanged` messages; the actor's in-memory cache serves the engine's synchronous `GetTag` reads. Reads of never-pushed paths return `BadNodeIdUnknown` quality (`UpstreamNotConfigured = 0x80340000`).
|
||||
- **`IHistoryWriter`** — no production implementation is currently wired for virtual tags; `VirtualTagEngine` gets `NullHistoryWriter` by default from `Phase7EngineComposer`.
|
||||
- **Upstream-tag feed.** `DependencyMuxActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs`) routes `DriverInstanceActor.AttributeValuePublished` events to the `VirtualTagActor` instances that declared interest in those tag refs. Each `VirtualTagActor` holds the in-memory per-tag dependency map; the `IVirtualTagEvaluator` (`RoslynVirtualTagEvaluator`) receives the dependency snapshot synchronously on the actor message thread. Reads of never-pushed dependency refs return `null` values in the dependency snapshot.
|
||||
- **`IHistoryWriter`** — no production implementation is wired for virtual tags; `VirtualTagEngine` receives `NullHistoryWriter` by default.
|
||||
|
||||
## Composition
|
||||
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) projects the published generation into a `Phase7Plan` that `Phase7Applier` applies to the running SDK node manager:
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) is a pure static function that flattens config-DB entities into a `Phase7CompositionResult` value (UNS topology + driver-instance plans + scripted-alarm plans). `Phase7Applier` applies that result into the OPC UA SDK node manager. Neither class has knowledge of `VirtualTagEngine` or `ScriptedAlarmEngine`.
|
||||
|
||||
1. `PrepareAsync(generationId, ct)` — called after the bootstrap generation loads and before `OpcUaApplicationHost.StartAsync`. Reads the `Script` / `VirtualTag` / `ScriptedAlarm` rows for that generation from the config DB (`OtOpcUaConfigDbContext`). Empty-config fast path returns `Phase7ComposedSources.Empty`.
|
||||
2. Constructs a `CachedTagUpstreamSource` + hands it to `Phase7EngineComposer.Compose`.
|
||||
3. `Phase7EngineComposer.Compose` projects `VirtualTag` rows into `VirtualTagDefinition`s (joining `Script` rows by `ScriptId`), instantiates `VirtualTagEngine`, calls `Load`, wraps in `VirtualTagSource`.
|
||||
4. Builds a `DriverFeed` per driver by mapping the driver's `EquipmentNamespaceContent` to `UNS path → driver fullRef` (path format `/{area}/{line}/{equipment}/{tag}` matching the `EquipmentNodeWalker` browse tree so script literals match the operator-visible UNS), then starts `DriverSubscriptionBridge`.
|
||||
5. Returns `Phase7ComposedSources` with the `VirtualTagSource` cast as `IReadable`. `OpcUaServerService` passes it to `OpcUaApplicationHost` which threads it into `DriverNodeManager` as `virtualReadable`.
|
||||
In the v2 actor system, virtual-tag engine composition is owned by the driver-role host actor tree:
|
||||
|
||||
`DisposeAsync` tears down the bridge first (no more events into the cache), then the engines (cascades + timer ticks stop), then the owned SQLite historian sink if any.
|
||||
- `Phase7Composer.Compose` emits `DriverInstancePlan` / `ScriptedAlarmPlan` records; the driver-role `DriverHostActor` spawns one `VirtualTagActor` per virtual-tag expression and one `ScriptedAlarmActor` per scripted alarm.
|
||||
- `RoslynVirtualTagEvaluator` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynVirtualTagEvaluator.cs`) is injected into each `VirtualTagActor` as its `IVirtualTagEvaluator`. It holds a per-source `CompiledScriptCache` keyed by script source and compiles on first use.
|
||||
- `DependencyMuxActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs`) receives every `DriverInstanceActor.AttributeValuePublished` event and routes it to the `VirtualTagActor` instances that registered interest in that tag ref.
|
||||
|
||||
Definition reload on config publish: `VirtualTagEngine.Load` is re-entrant — a future config-publish handler can call it with a new definition set. That handler is not yet wired; today engine composition happens once per service start against the bootstrapped generation.
|
||||
`VirtualTagEngine`, `VirtualTagSource`, `TimerTriggerScheduler`, and `ITagUpstreamSource` are available as standalone Core.VirtualTags primitives and remain the correct composition path for non-actor deployments (integration tests, future standalone runtimes).
|
||||
|
||||
## Key source files
|
||||
|
||||
@@ -130,7 +132,7 @@ Definition reload on config publish: `VirtualTagEngine.Load` is re-entrant — a
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/ScriptGlobals.cs` — generic globals wrapper naming the field `ctx`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/ScriptSandbox.cs` — assembly allow-list + imports
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/ForbiddenTypeAnalyzer.cs` — post-compile semantic deny-list
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/ScriptEvaluator.cs` — three-step compile pipeline
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/ScriptEvaluator.cs` — five-step compile pipeline (injection guard → Roslyn compile → ForbiddenTypeAnalyzer → PE emit → ALC load)
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/TimedScriptEvaluator.cs` — 250ms default timeout wrapper
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/CompiledScriptCache.cs` — SHA-256-keyed compile cache
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/DependencyExtractor.cs` — static `ctx.GetTag` / `ctx.SetVirtualTag` inference
|
||||
@@ -144,9 +146,9 @@ Definition reload on config publish: `VirtualTagEngine.Load` is re-entrant — a
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/ITagUpstreamSource.cs` — driver-tag read + subscribe port
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/IHistoryWriter.cs` — historize sink port + `NullHistoryWriter`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagSource.cs` — `IReadable` + `ISubscribable` adapter
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/VirtualTagActor.cs` — actor wrapper that owns per-instance state and the synchronous read cache
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs` — driver `ISubscribable` → actor feed (replaces the v1 `DriverSubscriptionBridge`)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynVirtualTagEvaluator.cs` — production Roslyn evaluator wired into the actor
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — row projection + engine instantiation (`Phase7Plan` composer)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/VirtualTagActor.cs` — actor that receives `DependencyValueChanged` from the mux and invokes `IVirtualTagEvaluator` per expression
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs` — routes `DriverInstanceActor.AttributeValuePublished` to interested `VirtualTagActor` subscribers
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynVirtualTagEvaluator.cs` — production `IVirtualTagEvaluator` binding; holds a per-source `CompiledScriptCache`
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — pure data composer: config-DB entities → `Phase7CompositionResult` (UNS topology + driver/alarm plans)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — applies the composed plan into the SDK node manager
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — driver-vs-virtual dispatch kernel
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — driver-agnostic OPC UA node-manager backbone; per-variable `NodeSourceKind` drives dispatch
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
# AB CIP Driver
|
||||
|
||||
In-process native-protocol driver that exposes Allen-Bradley CIP / EtherNet-IP
|
||||
controllers as OPC UA nodes. It runs inside the OtOpcUa server's .NET 10 AnyCPU
|
||||
process and talks to the PLC through the libplctag.NET wrapper — no gateway, no
|
||||
sidecar. One driver instance can serve many devices; per-device routing is keyed
|
||||
on the canonical `ab://gateway[:port]/cip-path` host-address string.
|
||||
|
||||
Supported families: **ControlLogix**, **CompactLogix**, **Micro800**, and
|
||||
**GuardLogix**. CIP has no native push model, so subscriptions are a polling
|
||||
overlay on top of `IReadable`.
|
||||
|
||||
For the driver spec (capability surface, config shape, type mapping), see
|
||||
[docs/v2/driver-specs.md §3](../v2/driver-specs.md). For the manual test client,
|
||||
see [Driver.AbCip.Cli.md](../Driver.AbCip.Cli.md). For the integration fixture
|
||||
coverage map, see [AbServer-Test-Fixture.md](AbServer-Test-Fixture.md).
|
||||
|
||||
## Project Layout
|
||||
|
||||
| Project | Role |
|
||||
|---------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip/` | The driver — `AbCipDriver`, the libplctag runtime/enumerator/template-reader wrappers, the UDT read planner + template decoders, the host-address parser, and the ALMD alarm projection. |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Contracts/` | `AbCipDriverOptions`, `AbCipDeviceOptions`, `AbCipTagDefinition` / `AbCipStructureMember`, and the `AbCipDataType` / `AbCipPlcFamily` enums bound from the driver's `DriverConfig` JSON. |
|
||||
|
||||
Per family the `AbCipPlcFamilyProfile` (`PlcFamilies/AbCipPlcFamilyProfile.cs`)
|
||||
supplies the libplctag `plc` attribute, default CIP path, ConnectionSize, and
|
||||
request-packing / connected-messaging quirks — ControlLogix is the baseline and
|
||||
each other family is a delta (Micro800 is unconnected-only with no backplane
|
||||
routing; GuardLogix shares the ControlLogix wire protocol with a tag-level safety
|
||||
partition).
|
||||
|
||||
## Capability Surface
|
||||
|
||||
`AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IAlarmSource, IDisposable, IAsyncDisposable`
|
||||
(`Driver.AbCip/AbCipDriver.cs`). It adds **`IAlarmSource`** over the Modbus /
|
||||
AB Legacy surface.
|
||||
|
||||
| Capability | Implementation entry point | Notes |
|
||||
|------------|---------------------------|-------|
|
||||
| `ITagDiscovery` | `DiscoverAsync` | Emits pre-declared tags under per-device folders; UDT tags with declared `Members` fan out into a sub-folder + one variable per member. With `EnableControllerBrowse` the `@tags` symbol table is walked into a `Discovered/` folder (system/module/routine tags filtered out). |
|
||||
| `IReadable` | `ReadAsync` → `ReadGroupAsync` / `ReadSingleAsync` | Per-tag reads; opt-in whole-UDT grouping (`EnableDeclarationOnlyUdtGrouping`) collapses N member reads into one. |
|
||||
| `IWritable` | `WriteAsync` | BOOL-within-DINT writes do a per-parent read-modify-write under a lock; `SafetyTag` and non-writable tags return `BadNotWritable`. |
|
||||
| `ISubscribable` | `SubscribeAsync` driven by the shared `PollGroupEngine` | CIP has no push model — subscriptions become polling groups. |
|
||||
| `IHostConnectivityProbe` | `ProbeLoopAsync` + `GetHostStatuses` | One probe loop per device reading `Probe.ProbeTagPath`; no path configured ⇒ a warning is logged and the device stays `Unknown`. |
|
||||
| `IPerCallHostResolver` | `ResolveHost` | Routes each call to the tag's `DeviceHostAddress`, the breaker key for the resilience pipeline so one dead PLC trips only its own breaker. |
|
||||
| `IAlarmSource` | `AbCipAlarmProjection` (ALMD) | Opt-in via `EnableAlarmProjection`; off by default the subscribe path is a no-op so capability negotiation still works. |
|
||||
|
||||
## Addressing Model
|
||||
|
||||
Per-device host addresses are the canonical `ab://gateway[:port]/cip-path` form
|
||||
parsed by `AbCipHostAddress.TryParse` (`AbCipHostAddress.cs`). The parsed
|
||||
`CipPath` is handed to libplctag verbatim, so no wire-layer translation is
|
||||
needed:
|
||||
|
||||
| Form | Meaning |
|
||||
|------|---------|
|
||||
| `ab://10.0.0.5/1,0` | Single-chassis ControlLogix, CPU in slot 0 |
|
||||
| `ab://10.0.0.5/1,2,2,192.168.50.20,1,0` | Bridged ControlLogix (routed path) |
|
||||
| `ab://10.0.0.5/` | Micro800 / no-backplane device (empty path) |
|
||||
| `ab://10.0.0.5:44818/1,0` | Explicit EIP port (default 44818) |
|
||||
|
||||
Tags carry a Logix symbolic `TagPath` (controller or program scope). UDT-typed
|
||||
tags are declared as `AbCipDataType.Structure` with a `Members` list; discovery
|
||||
fans each member out as `{tag.Name}.{member.Name}`, and the read planner can
|
||||
collapse a batch of members into one whole-UDT read when
|
||||
`EnableDeclarationOnlyUdtGrouping` is set. The whole-UDT fast path is opt-in
|
||||
because Studio 5000 may reorder members vs declaration order; decoding at
|
||||
declaration-order offsets against a reordered layout yields silently-plausible
|
||||
wrong numbers.
|
||||
|
||||
## Configuration
|
||||
|
||||
`AbCipDriverOptions` (`Driver.AbCip.Contracts/AbCipDriverOptions.cs`) binds from
|
||||
the driver's `DriverConfig` JSON. Key fields:
|
||||
|
||||
- **`Devices`** — one `AbCipDeviceOptions` per PLC (`HostAddress`, `PlcFamily`, optional `DeviceName`, per-device `AllowPacking` / `ConnectionSize` overrides).
|
||||
- **`Tags`** — pre-declared `AbCipTagDefinition` list; `Members` for UDT fan-out, `SafetyTag` for GuardLogix safety-partition tags.
|
||||
- **`Probe`** — connectivity-probe `Enabled` / `Interval` / `Timeout` / `ProbeTagPath`.
|
||||
- **Discovery** — `EnableControllerBrowse` (`@tags` walk) and `EnableDeclarationOnlyUdtGrouping` (whole-UDT read fast path).
|
||||
- **Alarms** — `EnableAlarmProjection` + `AlarmPollInterval` for the ALMD projection.
|
||||
|
||||
Full per-field descriptions live in `AbCipDriverOptions.cs`. The JSON skeleton is
|
||||
reproduced in [docs/v2/driver-specs.md §3](../v2/driver-specs.md).
|
||||
|
||||
## Alarm Projection
|
||||
|
||||
`IAlarmSource` is served by `AbCipAlarmProjection`, which polls each subscribed
|
||||
ALMD UDT's `InFaulted` + `Severity` members at `AlarmPollInterval` and fires
|
||||
`OnAlarmEvent` on raise/clear transitions. It is **ALMD-only** in this pass (ALMA
|
||||
analog alarms are a follow-up) and **disabled by default** — shops running FT
|
||||
Alarm & Events should keep it off and take alarms through the native route, since
|
||||
the projection semantics don't exactly mirror Rockwell FT A&E.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Tests/` cover the driver, host-address parser, UDT planner, and alarm projection via fake tag runtimes.
|
||||
- **Integration tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip.IntegrationTests/` run against the `ab_server` Docker fixture. See [AbServer-Test-Fixture.md](AbServer-Test-Fixture.md) for the coverage map and the `AB_SERVER_ENDPOINT` wiring.
|
||||
- **Manual client** — [Driver.AbCip.Cli.md](../Driver.AbCip.Cli.md).
|
||||
|
||||
## Operational Notes
|
||||
|
||||
- **Native heap is invisible to the GC.** `GetMemoryFootprint()` reports CLR allocations only; libplctag's native `Tag` heap does not show up there. Watch whole-process RSS, and use `ReinitializeAsync` (tears down + re-creates every device's libplctag handles) as the remediation for native-heap growth.
|
||||
- **Handle eviction on failure** — a non-zero libplctag status or a transport exception evicts the cached tag runtime so the next read/write re-creates a fresh handle, mirroring the probe loop's recreate-on-failure behaviour.
|
||||
- **Declaration-only UDT grouping is a footgun unless verified** — only enable `EnableDeclarationOnlyUdtGrouping` when every UDT's member declaration order has been hand-verified against the controller's compiled layout.
|
||||
@@ -6,26 +6,26 @@ MicroLogix / PLC-5 / LogixPccc-mode.
|
||||
**TL;DR:** Docker integration-test scaffolding lives at
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/` (task #224),
|
||||
reusing the AB CIP `ab_server` image in PCCC mode with per-family
|
||||
compose profiles (`slc500` / `micrologix` / `plc5`). Scaffold passes
|
||||
the skip-when-absent contract cleanly. **Wire-level round-trip against
|
||||
`ab_server` PCCC mode currently fails** with `BadCommunicationError`
|
||||
on read/write (verified 2026-04-20) — ab_server's PCCC server-side
|
||||
coverage is narrower than libplctag's PCCC client expects. The smoke
|
||||
tests target the correct shape for real hardware + should pass when
|
||||
`AB_LEGACY_ENDPOINT` points at a real SLC 5/05 / MicroLogix. Unit tests
|
||||
via `FakeAbLegacyTag` still carry the contract coverage.
|
||||
compose profiles (`slc500` / `micrologix` / `plc5`). The smoke tests pass
|
||||
for N-file (Int16), F-file (Float32), and L-file (Int32) reads across all
|
||||
three families when `AB_LEGACY_CIP_PATH=1,0` (the default). The earlier
|
||||
`BadCommunicationError` was traced to `ab_server` requiring a non-empty CIP
|
||||
routing path before forwarding to the PCCC dispatcher — the `/1,0` workaround
|
||||
resolves it (see `Docker/README.md §Known limitations`). Residual gap: bit-file
|
||||
writes (`B3:0/5`) still surface `0x803D0000` against `ab_server`. Unit tests
|
||||
via `FakeAbLegacyTag` carry full contract coverage for all paths.
|
||||
|
||||
## What the fixture is
|
||||
|
||||
**Integration layer** (task #224, scaffolded with a known ab_server
|
||||
gap):
|
||||
**Integration layer** (task #224):
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/` with
|
||||
`AbLegacyServerFixture` (TCP-probes `localhost:44818`) + three smoke
|
||||
tests (parametric read across families, SLC500 write-then-read). Reuses
|
||||
the AB CIP `otopcua-ab-server:libplctag-release` image via a relative
|
||||
`build:` context in `Docker/docker-compose.yml` — one image, different
|
||||
`--plc` flags. See `Docker/README.md` §Known limitations for the
|
||||
ab_server PCCC round-trip gap + resolution paths.
|
||||
`AbLegacyServerFixture` (TCP-probes `10.100.0.35:44818` — the shared Docker
|
||||
host; override with `AB_LEGACY_ENDPOINT`) + three smoke tests (parametric read
|
||||
across families, SLC500 write-then-read). Reuses the AB CIP
|
||||
`otopcua-ab-server:libplctag-release` image via a relative `build:` context in
|
||||
`Docker/docker-compose.yml` — one image, different `--plc` flags. See
|
||||
`Docker/README.md §Known limitations` for the CIP-path gate + bit-file write
|
||||
gap.
|
||||
|
||||
**Unit layer**: `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests/` is
|
||||
still the primary coverage. All tests tagged `[Trait("Category", "Unit")]`.
|
||||
@@ -93,13 +93,12 @@ cover the common ones but uncommon ones (`R` counters, `S` status files,
|
||||
|
||||
## Follow-up candidates
|
||||
|
||||
1. **Expand ab_server PCCC coverage** — the smoke suite passes today
|
||||
for N (Int16), F (Float32), and L (Int32) files across SLC500 /
|
||||
MicroLogix / PLC-5 modes with the `/1,0` cip-path workaround in
|
||||
place. Known residual gap: bit-file writes (`B3:0/5`) surface
|
||||
`0x803D0000`. Contributing a patch to `libplctag/libplctag` to close
|
||||
this + documenting ab_server's empty-path rejection in its README
|
||||
would remove the last Docker-vs-hardware divergences.
|
||||
1. **Close residual ab_server bit-file write gap** — N (Int16), F (Float32),
|
||||
and L (Int32) files round-trip cleanly across SLC500 / MicroLogix / PLC-5
|
||||
modes with the `/1,0` cip-path workaround in place. Remaining gap: bit-file
|
||||
writes (`B3:0/5`) surface `0x803D0000` against `ab_server --plc=SLC500`.
|
||||
Contributing a patch to `libplctag/libplctag` to close this would remove
|
||||
the last Docker-vs-hardware divergence for bit writes.
|
||||
2. **Rockwell RSEmulate 500 golden-box tier** — Rockwell's real emulator
|
||||
for SLC/MicroLogix/PLC-5. Would close UDT-equivalent (integer-file
|
||||
indirection), timer/counter decomposition, and real ladder execution
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
# AB Legacy Driver
|
||||
|
||||
In-process native-protocol driver that exposes legacy Allen-Bradley PLCs —
|
||||
**SLC 500**, **MicroLogix**, **PLC-5**, and Logix-via-PCCC — as OPC UA nodes. It
|
||||
runs inside the OtOpcUa server's .NET 10 AnyCPU process and speaks PCCC over
|
||||
EtherNet/IP through the same libplctag.NET wrapper as the AB CIP driver, but
|
||||
addresses data by **file** (data-table) rather than by symbolic tag. One driver
|
||||
instance can serve many devices; per-device routing is keyed on the canonical
|
||||
`ab://gateway[:port]/cip-path` host-address string. PCCC has no native push
|
||||
model, so subscriptions are a polling overlay on top of `IReadable`.
|
||||
|
||||
For the driver spec (capability surface, config shape, payload limits), see
|
||||
[docs/v2/driver-specs.md §4](../v2/driver-specs.md). For the manual test client,
|
||||
see [Driver.AbLegacy.Cli.md](../Driver.AbLegacy.Cli.md). For the integration
|
||||
fixture coverage map, see [AbLegacy-Test-Fixture.md](AbLegacy-Test-Fixture.md).
|
||||
|
||||
## Project Layout
|
||||
|
||||
| Project | Role |
|
||||
|---------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/` | The driver — `AbLegacyDriver`, the libplctag runtime wrapper, the PCCC file-address parser (`AbLegacyAddress`), the host-address parser, and the status mapper. |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Contracts/` | `AbLegacyDriverOptions`, `AbLegacyDeviceOptions`, `AbLegacyTagDefinition`, and the `AbLegacyDataType` / `AbLegacyPlcFamily` / `AbLegacyPlcFamilyProfile` records bound from the driver's `DriverConfig` JSON. |
|
||||
|
||||
Per family the `AbLegacyPlcFamilyProfile` supplies the libplctag `plc` attribute,
|
||||
default CIP path, max-payload bytes, and the `SupportsStringFile` /
|
||||
`SupportsLongFile` capability flags. MicroLogix uses direct EIP (empty default
|
||||
path); MicroLogix and PLC-5 don't ship L-files; PLC-5 predates them entirely.
|
||||
Tag types are validated against the device's profile at init time — declaring a
|
||||
`Long` or `String` tag on a family that can't support it fails fast with a clear
|
||||
message.
|
||||
|
||||
## Capability Surface
|
||||
|
||||
`AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscovery, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IDisposable, IAsyncDisposable`
|
||||
(`Driver.AbLegacy/AbLegacyDriver.cs`). There is **no `IAlarmSource`** — unlike the
|
||||
AB CIP driver, PCCC has no ALMD instruction to project, so alarms are out of
|
||||
scope.
|
||||
|
||||
| Capability | Implementation entry point | Notes |
|
||||
|------------|---------------------------|-------|
|
||||
| `ITagDiscovery` | `DiscoverAsync` | Emits pre-declared tags under per-device folders. Tags are single-element today (`IsArray` hard-wired false); multi-element file ranges are a tracked follow-up. |
|
||||
| `IReadable` | `ReadAsync` | Per-tag reads serialized per cached runtime under a lock (a libplctag `Tag` handle is not concurrency-safe across the server read path + poll loop). |
|
||||
| `IWritable` | `WriteAsync` | Bit-within-word writes (N-file `N7:0/3`, B-file bits) do a per-parent-word read-modify-write under a lock. Non-writable tags return `BadNotWritable`. |
|
||||
| `ISubscribable` | `SubscribeAsync` driven by the shared `PollGroupEngine` | No push model — subscriptions become polling groups. |
|
||||
| `IHostConnectivityProbe` | `ProbeLoopAsync` + `GetHostStatuses` | One probe loop per device reading `Probe.ProbeAddress`; transitions log Warning (down) / Information (recover). |
|
||||
| `IPerCallHostResolver` | `ResolveHost` | Routes each call to the tag's `DeviceHostAddress`; unknown references fall back to the first device, never throwing (per the interface contract). |
|
||||
|
||||
## Addressing Model
|
||||
|
||||
Per-device host addresses are the canonical `ab://gateway[:port]/cip-path` form
|
||||
parsed by `AbLegacyHostAddress.TryParse`. When the parsed CIP path is empty the
|
||||
family profile's default path is used (e.g. SLC 500 gets `1,0`; MicroLogix stays
|
||||
empty for direct EIP).
|
||||
|
||||
Tags carry a PCCC **file address** parsed by `AbLegacyAddress` (`AbLegacyAddress.cs`)
|
||||
— file letter + file number + word number, with an optional bit index (`/N`) or
|
||||
structured sub-element (`.ACC`, `.PRE`, …). The string is passed straight through
|
||||
to libplctag's `name=` attribute; the parser validates shape and surfaces the
|
||||
pieces for driver-side routing (e.g. deciding a bit needs read-modify-write):
|
||||
|
||||
| Form | Meaning |
|
||||
|------|---------|
|
||||
| `N7:0` | Integer file 7, word 0 (signed 16-bit) |
|
||||
| `F8:0` | Float file 8, word 0 (32-bit IEEE-754) |
|
||||
| `B3:0/0` | Bit file 3, word 0, bit 0 |
|
||||
| `L9:0` | Long-integer file (SLC 5/05+, 32-bit) |
|
||||
| `ST9:0` | String file (82-byte fixed-length) |
|
||||
| `T4:0.ACC` / `C5:0.PRE` | Timer / counter sub-element |
|
||||
| `I:0/0` / `O:1/2` / `S:1` | Input / output / status system files (no file number) |
|
||||
|
||||
`AbLegacyDataType` covers the corresponding PCCC types: `Bit`, `Int` (N), `Long`
|
||||
(L), `Float` (F), `AnalogInt` (A), `String` (ST), and the `TimerElement` /
|
||||
`CounterElement` / `ControlElement` sub-element families. The parser enforces
|
||||
PCCC structural rules — bit-addressing only on 16/32-bit element files,
|
||||
sub-elements only on T/C/R files, no file number on I/O/S — rejecting malformed
|
||||
addresses before they reach libplctag.
|
||||
|
||||
## Configuration
|
||||
|
||||
`AbLegacyDriverOptions` (`Driver.AbLegacy.Contracts/AbLegacyDriverOptions.cs`)
|
||||
binds from the driver's `DriverConfig` JSON:
|
||||
|
||||
- **`Devices`** — one `AbLegacyDeviceOptions` per PLC (`HostAddress`, `PlcFamily`, optional `DeviceName`).
|
||||
- **`Tags`** — pre-declared `AbLegacyTagDefinition` list (`Name`, `DeviceHostAddress`, `Address`, `DataType`, `Writable`, `WriteIdempotent`).
|
||||
- **`Probe`** — connectivity-probe `Enabled` / `Interval` / `Timeout` / `ProbeAddress`.
|
||||
|
||||
Full per-field descriptions live in the contracts assembly. The JSON skeleton is
|
||||
reproduced in [docs/v2/driver-specs.md §4](../v2/driver-specs.md).
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests/` cover the driver, the PCCC address parser, and the host-address parser via fake tag runtimes.
|
||||
- **Integration tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.IntegrationTests/` run against the AB Legacy Docker fixture. See [AbLegacy-Test-Fixture.md](AbLegacy-Test-Fixture.md) for the coverage map.
|
||||
- **Manual client** — [Driver.AbLegacy.Cli.md](../Driver.AbLegacy.Cli.md).
|
||||
|
||||
## Operational Notes
|
||||
|
||||
- **Native heap is invisible to the GC.** As with AB CIP, `GetMemoryFootprint()` reports CLR allocations only; watch whole-process RSS and use `ReinitializeAsync` to recycle libplctag handles.
|
||||
- **PCCC reconnect is more expensive than CIP** — legacy PLCs have no connection multiplexing, so the resilience pipeline should use longer backoff than for AB CIP (see [docs/v2/driver-specs.md §4](../v2/driver-specs.md)).
|
||||
- **Single-element addressing today** — a PCCC file is inherently an array (an N7 file is up to 256 words), but the current tag surface addresses one element per tag; range-spanning tags must be enumerated element-by-element until multi-element addressing lands.
|
||||
@@ -10,17 +10,20 @@ quirk. UDT / alarm / quirk behavior is verified only by unit tests with
|
||||
|
||||
## What the fixture is
|
||||
|
||||
- **Binary**: `ab_server` — a C program in libplctag's
|
||||
`src/tools/ab_server/` ([libplctag/libplctag](https://github.com/libplctag/libplctag),
|
||||
MIT).
|
||||
- **Binary**: `ab_server` — a C program from the upstream
|
||||
[libplctag/libplctag](https://github.com/libplctag/libplctag) repository
|
||||
(MIT license). It is **not** part of this repo's source tree; `Docker/Dockerfile`
|
||||
clones libplctag at a pinned tag and builds the `ab_server` CMake target in a
|
||||
multi-stage build.
|
||||
- **Launcher**: Docker (only supported path). `Docker/Dockerfile`
|
||||
multi-stage-builds `ab_server` from source against a pinned libplctag
|
||||
multi-stage-builds `ab_server` from source by cloning libplctag at a pinned
|
||||
tag + copies the binary into a slim runtime image.
|
||||
`Docker/docker-compose.yml` has per-family services (`controllogix`
|
||||
/ `compactlogix` / `micro800` / `guardlogix`); all bind `:44818`.
|
||||
- **Lifecycle**: `AbServerFixture` TCP-probes `127.0.0.1:44818` at
|
||||
collection init + records a skip reason when unreachable. Tests skip
|
||||
via `[AbServerFact]` / `[AbServerTheory]` which check the same probe.
|
||||
- **Lifecycle**: `AbServerFixture` TCP-probes `10.100.0.35:44818` (the shared
|
||||
Docker host) at collection init + records a skip reason when unreachable.
|
||||
Tests skip via `[AbServerFact]` / `[AbServerTheory]` which check the same
|
||||
probe.
|
||||
- **Profiles**: `KnownProfiles.{ControlLogix, CompactLogix, Micro800, GuardLogix}`
|
||||
in `AbServerProfile.cs` — thin Family + ComposeProfile + Notes records;
|
||||
the compose file is the canonical source of truth for which tags get
|
||||
@@ -71,12 +74,15 @@ Unit coverage: `AbCipAlarmProjectionTests` — fakes feed `InFaulted` /
|
||||
|
||||
### 3. Micro800 unconnected-only path
|
||||
|
||||
Micro800 profile `Notes`: *"ab_server has no --plc micro800 — falls back to
|
||||
controllogix emulation."*
|
||||
Micro800 profile `Notes`: *"--plc=Micro800 mode (unconnected-only, empty path).
|
||||
Driver-side enforcement verified in the unit suite."*
|
||||
|
||||
The empty routing path + unconnected-session requirement (PR 11) is unit-tested
|
||||
but never challenged at the CIP wire level. Real Micro800 (2080-series) on a
|
||||
lab rig would be the authoritative benchmark.
|
||||
The compose service boots `ab_server --plc=Micro800` with an empty routing path.
|
||||
The unconnected-session requirement (PR 11) is validated at the driver unit-test
|
||||
level via `FakeAbCipTagRuntime`; the wire-level contract (what happens when
|
||||
a connected-send arrives at a real Micro800 backplane) is not exercised by the
|
||||
simulator. Real Micro800 (2080-series) on a lab rig would be the authoritative
|
||||
benchmark.
|
||||
|
||||
### 4. GuardLogix safety subsystem
|
||||
|
||||
@@ -177,7 +183,7 @@ project is authored.
|
||||
| "Is my atomic read path wired correctly?" | yes | yes | yes | yes |
|
||||
| "Does whole-UDT grouping work?" | no | yes | **yes** | yes |
|
||||
| "Do ALMD alarms raise + clear?" | no | yes | **yes** | yes |
|
||||
| "Is Micro800 unconnected-only enforced wire-side?" | no (emulated as CLX) | partial | yes | yes (required) |
|
||||
| "Is Micro800 unconnected-only enforced wire-side?" | partial (--plc=Micro800 boots, but wire rejection untested) | partial | yes | yes (required) |
|
||||
| "Does GuardLogix reject non-safety writes on safety tags?" | no | no | yes (Emulate 5580) | yes |
|
||||
| "Does CompactLogix refuse oversized ConnectionSize?" | no | partial | yes (5370 firmware) | yes |
|
||||
| "Does BOOL-in-DINT RMW race against concurrent writers?" | no | yes | partial | yes (stress) |
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
Coverage map + gap inventory for the FANUC FOCAS2 CNC driver.
|
||||
|
||||
**Status:** as of 2026-04-24, OtOpcUa speaks FOCAS2 directly over TCP
|
||||
via the pure-managed [`Focas.Wire`](https://github.com/Ladder99/focas-mock/tree/main/dotnet/Focas.Wire)
|
||||
OtOpcUa speaks FOCAS2 directly over TCP via the pure-managed
|
||||
[`Focas.Wire`](https://github.com/Ladder99/focas-mock/tree/main/dotnet/Focas.Wire)
|
||||
client. Integration tests run the managed driver end-to-end against the
|
||||
vendored `focas-mock` Python server (at
|
||||
[`tests/.../Docker/focas-mock/`](../../tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/Docker/focas-mock/VENDORED.md))
|
||||
@@ -51,8 +51,9 @@ message naming the CNC series + documented limit.
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/` drives the
|
||||
managed `FocasDriver` end-to-end. A single gate:
|
||||
|
||||
**Docker compose up** — tests skip when the TCP probe to
|
||||
`localhost:8193` fails with a pointer to the compose command.
|
||||
**Docker compose up** — tests skip when the TCP probe fails, with a
|
||||
pointer to the compose command. The endpoint defaults to `localhost:8193`
|
||||
and is overridable via `OTOPCUA_FOCAS_SIM_ENDPOINT`.
|
||||
|
||||
When the mock is up, `WireFocasClient` dials it over TCP exactly like a
|
||||
real CNC, and the mock's native FOCAS Ethernet responder replies with
|
||||
@@ -137,10 +138,10 @@ Or use `scripts/integration/run-focas.ps1` which wraps compose up / test
|
||||
— per-series compose profiles
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/FocasSimFixture.cs`
|
||||
— collection fixture + mock admin API client
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/Series/FixedTreePopulatesTests.cs`
|
||||
— fixed-tree end-to-end tests
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/Series/WireBackendTests.cs`
|
||||
— pure-wire-backend end-to-end tests
|
||||
— fixed-tree end-to-end tests (identity / axes / spindle / program / timers)
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/Series/WireBackendCoverageTests.cs`
|
||||
— broader wire-backend coverage: PARAM / MACRO / PMC reads, `DiscoverAsync`, `SubscribeAsync`, `IAlarmSource` raise + clear, `IHostConnectivityProbe`
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.Tests/FakeFocasClient.cs` —
|
||||
in-process unit fake
|
||||
- `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS/Wire/WireFocasClient.cs` — the
|
||||
|
||||
@@ -4,7 +4,7 @@ The Galaxy driver bridges OtOpcUa to AVEVA System Platform (Wonderware) Galaxies
|
||||
|
||||
For the driver spec (capability surface, config shape, addressing), see [docs/v2/driver-specs.md §1](../v2/driver-specs.md). For the gateway setup recipe, see [docs/v2/Galaxy.ParityRig.md](../v2/Galaxy.ParityRig.md). For tracing, metrics, and soak profile, see [docs/v2/Galaxy.Performance.md](../v2/Galaxy.Performance.md).
|
||||
|
||||
> **Note**: the related drivers `Galaxy-Repository.md` and `Galaxy-Test-Fixture.md` describe the previous v1 / out-of-process topology and are being moved to `docs/v1/` by a parallel cleanup track. Use `Galaxy.ParityRig.md` and the `mxaccessgw` repo for current testing.
|
||||
> **Note**: the related docs [`Galaxy-Repository.md`](../v1/drivers/Galaxy-Repository.md) and [`Galaxy-Test-Fixture.md`](../v1/drivers/Galaxy-Test-Fixture.md) describe the previous v1 / out-of-process topology and now live under `docs/v1/drivers/`. For current testing use [`Galaxy.ParityRig.md`](../v2/Galaxy.ParityRig.md) and the `mxaccessgw` repo.
|
||||
|
||||
## Architecture
|
||||
|
||||
@@ -65,7 +65,7 @@ Project root files:
|
||||
|
||||
## Capability Surface
|
||||
|
||||
`GalaxyDriver : IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IRediscoverable, IHostConnectivityProbe, IDisposable`.
|
||||
`GalaxyDriver : IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IRediscoverable, IHostConnectivityProbe, IAlarmSource, IDisposable, IAsyncDisposable`.
|
||||
|
||||
| Capability | Implementation entry point |
|
||||
|------------|---------------------------|
|
||||
@@ -75,6 +75,7 @@ Project root files:
|
||||
| `IWritable` | `Runtime/GatewayGalaxyDataWriter.cs` |
|
||||
| `ISubscribable` | `Runtime/GatewayGalaxySubscriber.cs` (driven by `EventPump`) |
|
||||
| `IHostConnectivityProbe` | `Health/HostStatusAggregator.cs` |
|
||||
| `IAlarmSource` | `Runtime/GatewayGalaxyAlarmFeed.cs` (transitions) + `Runtime/GatewayGalaxyAlarmAcknowledger.cs` (acks) |
|
||||
|
||||
## Configuration
|
||||
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
# Wonderware Historian Backend
|
||||
|
||||
The Wonderware Historian backend is **not a tag driver** — it has no address
|
||||
space, no `IDriver` lifecycle, and exposes no PLC. It is a **server-side
|
||||
historian sink**: an optional sidecar that gives OtOpcUa read access to AVEVA
|
||||
System Platform (Wonderware) Historian history and a write-back path for alarm
|
||||
events. It runs only when `Historian:Wonderware:Enabled=true`.
|
||||
|
||||
For the sidecar's place in a deployment, see
|
||||
[ServiceHosting.md](../ServiceHosting.md). For the alarm-history store-and-forward
|
||||
flow that drains into it, see [AlarmHistorian.md](../AlarmHistorian.md).
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
+-------------------------------------------+
|
||||
| OtOpcUa Host (.NET 10 AnyCPU) |
|
||||
| Server.History.IHistoryRouter --read--+--+
|
||||
| Core.AlarmHistorian.SqliteStore | |
|
||||
| AndForwardSink --write----+--+
|
||||
| WonderwareHistorianClient (.NET 10) | |
|
||||
+-------------------------------------------+ |
|
||||
| named pipe
|
||||
MessagePack frames | (shared secret + allowed-SID)
|
||||
v
|
||||
+-------------------------------------------+
|
||||
| OtOpcUaWonderwareHistorian (sidecar) |
|
||||
| net48 / x64 |
|
||||
| PipeServer + HistorianFrameHandler |
|
||||
| HistorianDataSource (reads) |
|
||||
| SdkAlarmHistorianWriteBackend (writes) |
|
||||
| aahClientManaged / HistorianAccess |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
|
||||
The split exists because the AVEVA Historian SDK (`aahClientManaged` +
|
||||
native `aahClient.dll`) is .NET Framework 4.8 / x64 — so it lives out-of-process
|
||||
in the sidecar, and everything in the OtOpcUa host stays .NET 10 AnyCPU. The
|
||||
host never references the SDK; it speaks the pipe contract only.
|
||||
|
||||
## Project split
|
||||
|
||||
| Project | Target | Role |
|
||||
|---------|--------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/` | net48 / x64 | The **sidecar** (`OutputType=Exe`). Hosts the named-pipe server, the historian reader, and the alarm-write backend bound to the AVEVA SDK |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/` | net10.0 | `WonderwareHistorianClient` — the in-host pipe client consumed by the history router and the alarm sink |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/` | net10.0 | `WonderwareHistorianClientOptions` (pipe name, shared secret, timeouts) |
|
||||
|
||||
> The csproj targets **net48 / x64** (`PlatformTarget=x64`) — the AVEVA Historian
|
||||
> 2020 SDK ships an x64 `aahClientManaged` build; the earlier x86 default was an
|
||||
> inherited v1 artifact, not a constraint of the Historian SDK.
|
||||
|
||||
## What it does
|
||||
|
||||
The sidecar exposes two surfaces, both over the same named pipe:
|
||||
|
||||
### Read path — `IHistorianDataSource`
|
||||
|
||||
`HistorianDataSource` (in the sidecar) reads history through the
|
||||
`aahClientManaged` SDK; `WonderwareHistorianClient` (in the host) implements
|
||||
`IHistorianDataSource` and maps returned samples back to OPC UA `DataValue`s for
|
||||
`Server.History.IHistoryRouter`. The read surface is:
|
||||
|
||||
| Call | Maps to |
|
||||
|------|---------|
|
||||
| `ReadRawAsync` | Raw historical samples for a tag over a time range |
|
||||
| `ReadProcessedAsync` / `ReadAggregateAsync` | Aggregated samples at an interval |
|
||||
| `ReadAtTimeAsync` | Samples at specific timestamps |
|
||||
| `ReadEventsAsync` | Historical events for a source |
|
||||
| `GetHealthSnapshot` | Connection health for the host-side health surface |
|
||||
|
||||
### Write path — alarm-historian write-back
|
||||
|
||||
`WonderwareHistorianClient` also implements `IAlarmHistorianWriter`. Alarm events
|
||||
are drained into the sidecar from `Core.AlarmHistorian.SqliteStoreAndForwardSink`
|
||||
and persisted by `SdkAlarmHistorianWriteBackend` via
|
||||
`HistorianAccess.AddStreamedValue(HistorianEvent, out HistorianAccessError)`. The
|
||||
production writer is wrapped by `AahClientManagedAlarmEventWriter`, which handles
|
||||
batch orchestration and per-event `HistorianAccessError` outcome classification
|
||||
(connection-class errors are retryable; malformed-argument errors are not).
|
||||
|
||||
The alarm write path can be disabled independently of reads by setting
|
||||
`OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED=false` — the sidecar then rejects
|
||||
`WriteAlarmEvents` frames while still serving history reads.
|
||||
|
||||
## Hosting and IPC
|
||||
|
||||
- **Process**: `OtOpcUaWonderwareHistorian`, installed/managed by
|
||||
`scripts/install/` (`Install-Services.ps1 -InstallWonderwareHistorian`).
|
||||
- **Spawn config**: the supervisor passes the pipe name, the allowed server
|
||||
principal SID, and a per-process shared secret via environment
|
||||
(`OTOPCUA_HISTORIAN_PIPE`, `OTOPCUA_ALLOWED_SID`, `OTOPCUA_HISTORIAN_SECRET`);
|
||||
Historian connection settings come from `OTOPCUA_HISTORIAN_SERVER` /
|
||||
`_PORT` / `_INTEGRATED` / `_USER` / `_PASS` etc. (see
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Program.cs`).
|
||||
- **Pipe-only mode**: with `OTOPCUA_HISTORIAN_ENABLED!=true` the sidecar boots
|
||||
without loading the SDK at all — used for smoke and IPC tests.
|
||||
- **Wire**: MessagePack-framed request/reply; the named-pipe ACL restricts the
|
||||
pipe to the allowed SID and the client proves the shared secret in a Hello
|
||||
frame. The client owns a single channel with one in-flight call at a time and
|
||||
retries a transport failure once before propagating — broader backoff is the
|
||||
caller's responsibility.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Sidecar unit tests** —
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests/` cover the
|
||||
reader, the alarm-write backend outcome classification, and the pipe-frame
|
||||
handler with a faked SDK seam.
|
||||
- **Client unit tests** —
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Tests/`
|
||||
cover the pipe client + framing against an in-process duplex pipe pair.
|
||||
|
||||
## Further reading
|
||||
|
||||
- [ServiceHosting.md](../ServiceHosting.md) — where the sidecar fits in a
|
||||
deployment and how it's installed
|
||||
- [AlarmHistorian.md](../AlarmHistorian.md) — the alarm store-and-forward flow
|
||||
that feeds the write-back path
|
||||
@@ -3,11 +3,11 @@
|
||||
Coverage map + gap inventory for the Modbus TCP driver's integration-test
|
||||
harness backed by `pymodbus` simulator profiles per PLC family.
|
||||
|
||||
**TL;DR:** Modbus is the best-covered driver — a real `pymodbus` server on
|
||||
localhost with per-family seed-register profiles, plus a skip-gate when the
|
||||
simulator port isn't reachable. Covers DL205 / Mitsubishi MELSEC / Siemens
|
||||
S7-1500 family quirks end-to-end. Gaps are mostly error-path + alarm/history
|
||||
shaped (neither is a Modbus-side concept).
|
||||
**TL;DR:** Modbus is the best-covered driver — a real `pymodbus` server on the
|
||||
shared Docker host (`10.100.0.35:5020`) with per-family seed-register profiles,
|
||||
plus a skip-gate when the simulator port isn't reachable. Covers DL205 /
|
||||
Mitsubishi MELSEC / Siemens S7-1500 family quirks end-to-end. Gaps are mostly
|
||||
error-path + alarm/history shaped (neither is a Modbus-side concept).
|
||||
|
||||
## What the fixture is
|
||||
|
||||
@@ -16,8 +16,9 @@ shaped (neither is a Modbus-side concept).
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.IntegrationTests/Docker/`.
|
||||
Docker is the only supported launch path.
|
||||
- **Lifecycle**: `ModbusSimulatorFixture` (collection-scoped) TCP-probes
|
||||
`localhost:5020` on first use. `MODBUS_SIM_ENDPOINT` env var overrides the
|
||||
endpoint so the same suite can target a real PLC.
|
||||
`10.100.0.35:5020` (the shared Docker host) on first use.
|
||||
`MODBUS_SIM_ENDPOINT` env var overrides the endpoint so the same suite can
|
||||
target a real PLC or a locally-running container.
|
||||
- **Profiles**: `DL205Profile`, `MitsubishiProfile`, `S7_1500Profile` —
|
||||
each composes device-specific register-format + quirk-seed JSON for pymodbus.
|
||||
Profile JSONs live under `Docker/profiles/` and are baked into the image.
|
||||
@@ -102,8 +103,9 @@ Not a Modbus concept. Driver doesn't implement `IAlarmSource` or
|
||||
|
||||
## Follow-up candidates
|
||||
|
||||
1. Add `MODBUS_SIM_ENDPOINT` override documentation to
|
||||
`docs/v2/test-data-sources.md` so operators can point the suite at a lab rig.
|
||||
1. Add `MODBUS_SIM_ENDPOINT` cross-reference to
|
||||
`docs/v2/test-data-sources.md` (already documented in this page + CLAUDE.md;
|
||||
the v2 page could link here for the complete env-var table).
|
||||
2. ~~Extend `pymodbus` profiles to inject exception responses~~ — **shipped**
|
||||
via the `exception_injection` compose profile + standalone
|
||||
`exception_injector.py` server. Rules in
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
# Modbus Driver
|
||||
|
||||
In-process native-protocol driver that exposes Modbus-TCP devices as OPC UA
|
||||
variable nodes. It runs inside the OtOpcUa server's .NET 10 AnyCPU process and
|
||||
speaks Modbus-TCP directly over a socket — no gateway, no sidecar, no bitness
|
||||
constraint. Modbus has no discovery protocol and no native push model, so the
|
||||
address space is built entirely from pre-declared tags and subscriptions are a
|
||||
polling overlay on top of `IReadable`.
|
||||
|
||||
For the driver spec (capability surface, config shape, byte-order matrix), see
|
||||
[docs/v2/driver-specs.md §2](../v2/driver-specs.md). For the manual test client,
|
||||
see [Driver.Modbus.Cli.md](../Driver.Modbus.Cli.md). For the integration fixture
|
||||
coverage map, see [Modbus-Test-Fixture.md](Modbus-Test-Fixture.md).
|
||||
|
||||
## Project Layout
|
||||
|
||||
| Project | Role |
|
||||
|---------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/` | The driver — `ModbusDriver` plus the `ModbusTcpTransport` socket layer, the connectivity probe, and the auto-prohibition planner. |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing/` | Shared address grammar — `ModbusAddressParser` and the `ModbusRegion` / `ModbusDataType` / `ModbusByteOrder` / `ModbusFamily` enums. Lives in its own assembly so the Admin UI and the parser can speak about addresses without a transport dependency. |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts/` | `ModbusDriverOptions` + `ModbusTagDefinition` config records bound from the driver's `DriverConfig` JSON. |
|
||||
|
||||
## Capability Surface
|
||||
|
||||
`ModbusDriver : IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IDisposable, IAsyncDisposable`
|
||||
(`Driver.Modbus/ModbusDriver.cs`). There is **no `IAlarmSource`** and no
|
||||
`IHistoryProvider` — the Modbus protocol expresses neither, so those capabilities
|
||||
are out of scope by design.
|
||||
|
||||
| Capability | Implementation entry point | Notes |
|
||||
|------------|---------------------------|-------|
|
||||
| `ITagDiscovery` | `DiscoverAsync` | Emits one `Modbus/{tag}` variable per pre-declared tag; Modbus has no browse protocol, so the driver returns exactly the configured `Tags`. |
|
||||
| `IReadable` | `ReadAsync` → `ReadOneAsync` / `ReadCoalescedAsync` | FC01/FC02 for coils, FC03/FC04 for registers; auto-chunks reads past the per-device cap. |
|
||||
| `IWritable` | `WriteAsync` → `WriteOneAsync` | FC05/FC15 for coils, FC06/FC16 for registers; `BitInRegister` writes do a per-register read-modify-write under a lock. `DiscreteInputs` / `InputRegisters` are read-only and return `BadNotWritable`. |
|
||||
| `ISubscribable` | `SubscribeAsync` driven by the shared `PollGroupEngine` | No native push — subscriptions become per-tag polling groups with an optional per-tag `Deadband` filter. |
|
||||
| `IHostConnectivityProbe` | `ProbeLoopAsync` + `GetHostStatuses` | Periodic cheap FC03 at `Probe.ProbeAddress`; `HostName` is the `Host:Port` string surfaced to the Admin UI. |
|
||||
| `IPerCallHostResolver` | `ResolveHost` | Routes each call to a per-slave breaker key (`Host:Port/unit{UnitId}`) so a dead RTU slave behind a multi-unit gateway opens its own breaker. |
|
||||
|
||||
## Addressing Model
|
||||
|
||||
Every exposed register is a pre-declared `ModbusTagDefinition` (Region, Address,
|
||||
DataType, ByteOrder, …). Tag spreadsheets are typically authored as address
|
||||
strings parsed by `ModbusAddressParser` at config-bind time; the grammar is
|
||||
`<region><offset>[.<bit>][:<type>[<len>]][:<order>][:<count>]`:
|
||||
|
||||
| Form | Example | Meaning |
|
||||
|------|---------|---------|
|
||||
| Modicon digits | `40001` / `400001` | Holding register 0 (5- or 6-digit form), default Int16 |
|
||||
| Mnemonic prefix | `HR1` / `IR1` / `C100` / `DI5` | Region prefix + 1-based register number |
|
||||
| Bit suffix | `40001.5` | Bit 5 of holding register 0 (`BitInRegister`) |
|
||||
| Explicit type | `40001:F` / `40001:STR20` | Float32 / 20-char ASCII string |
|
||||
| Word order | `40001:F:CDAB` | Float32 with word-swap byte order |
|
||||
| Array | `40001:F:5` | Float32[5] (consumes HR[0..9]) |
|
||||
|
||||
The four regions (`Coils`, `DiscreteInputs`, `InputRegisters`,
|
||||
`HoldingRegisters`) map directly to function-code selection. The type codes are
|
||||
aligned with Wonderware DASMBTCP and the Ignition Modbus driver so pasted tag
|
||||
sheets translate without manual rewriting.
|
||||
|
||||
**Byte/word order** is the most common production misconfiguration. The four
|
||||
`ModbusByteOrder` mnemonics — `ABCD` (BigEndian, spec default), `CDAB`
|
||||
(WordSwap), `BADC` (ByteSwap), `DCBA` (FullReverse) — describe how bytes A/B/C/D
|
||||
appear across consecutive registers when decoding a multi-register value.
|
||||
|
||||
## Device Profiles
|
||||
|
||||
`ModbusDriverOptions.Family` selects a parser family-native branch
|
||||
(`ModbusFamily`):
|
||||
|
||||
- **`Generic`** (default) — only Modicon (`4xxxx`) and mnemonic (`HR1`, `C100`) forms are accepted.
|
||||
- **`DL205`** — AutomationDirect DirectLOGIC. V-memory (octal) → HoldingRegisters, `Y`/`C` → Coils, `X`/`SP` → DiscreteInputs. Strings can be packed low-byte-first via `ModbusTagDefinition.StringByteOrder` (the grammar can't express this — see `ModbusStringByteOrder`).
|
||||
- **`MELSEC`** — Mitsubishi. D-registers → HoldingRegisters, `X` → DiscreteInputs, `Y`/`M` → Coils; the `MelsecSubFamily` selector switches Q/L/iQR (hex) vs FX (octal) X/Y interpretation.
|
||||
|
||||
Per-family register caps are honoured through `MaxRegistersPerRead` /
|
||||
`MaxRegistersPerWrite` / `MaxCoilsPerRead` (e.g. DL205/DL260 cap reads at 128,
|
||||
Mitsubishi Q at 64); the driver auto-chunks larger reads into consecutive
|
||||
requests.
|
||||
|
||||
## Coalesced Reads + Auto-Prohibition
|
||||
|
||||
When `MaxReadGap > 0` the read planner (`ReadCoalescedAsync`) groups tags in the
|
||||
same `(UnitId, Region)`, sorts by address, and merges near-adjacent register
|
||||
spans (gap ≤ `MaxReadGap`, total span ≤ the read cap) into a single FC03/FC04
|
||||
PDU, then slices the response back into per-tag values. If a coalesced read hits
|
||||
a Modbus exception (illegal/protected register), the offending range is recorded
|
||||
as **auto-prohibited** so the planner stops re-coalescing across it; the
|
||||
surviving members fall back to per-tag reads in the same scan. Setting
|
||||
`AutoProhibitReprobeInterval` starts a background loop that periodically retries
|
||||
prohibited ranges and uses bisection to narrow a multi-register prohibition down
|
||||
to the actual offending register(s). Per-tag escape hatch:
|
||||
`ModbusTagDefinition.CoalesceProhibited`.
|
||||
|
||||
## Configuration
|
||||
|
||||
`ModbusDriverOptions` (`Driver.Modbus.Contracts/ModbusDriverOptions.cs`) binds
|
||||
from the driver's `DriverConfig` JSON. Key fields:
|
||||
|
||||
- **Endpoint** — `Host`, `Port` (default 502), `UnitId`, `Timeout`. Per-tag `UnitId` overrides drive multi-slave gateway topology.
|
||||
- **`Tags`** — the pre-declared `ModbusTagDefinition` list; this *is* the address space.
|
||||
- **`Probe`** — connectivity-probe interval / timeout / probe register (default register 0).
|
||||
- **Read/write caps** — `MaxRegistersPerRead` (125), `MaxRegistersPerWrite` (123), `MaxCoilsPerRead` (2000), plus `MaxReadGap` and `AutoProhibitReprobeInterval` for coalescing.
|
||||
- **Function-code overrides** — `UseFC15ForSingleCoilWrites`, `UseFC16ForSingleRegisterWrites` for PLCs that only accept multi-write codes.
|
||||
- **Resilience** — `AutoReconnect`, `KeepAlive`, `IdleDisconnectTimeout`, `Reconnect` backoff, and `WriteOnChangeOnly` redundant-write suppression.
|
||||
|
||||
Full per-field descriptions live in `ModbusDriverOptions.cs`. The JSON skeleton
|
||||
is reproduced in [docs/v2/driver-specs.md §2](../v2/driver-specs.md).
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/` (driver behaviour via a fake transport) and `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing.Tests/` (the address grammar).
|
||||
- **Integration tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.IntegrationTests/` run against the Docker Modbus simulator fixture. See [Modbus-Test-Fixture.md](Modbus-Test-Fixture.md) for the coverage map and the `MODBUS_SIM_ENDPOINT` wiring.
|
||||
- **Manual client** — [Driver.Modbus.Cli.md](../Driver.Modbus.Cli.md).
|
||||
|
||||
## Operational Notes
|
||||
|
||||
- **Wrong-endian readings are silently plausible.** A byte-order misconfiguration produces a wrong number, not a Bad quality code — surface byte-order mismatches as data-validation alerts, not status codes (see [docs/v2/driver-specs.md §2](../v2/driver-specs.md)).
|
||||
- **`WriteOnChangeOnly` + write-only tags** — the suppression cache is only invalidated by a read that returns a divergent value. A tag that is never subscribed/polled never refreshes its cache entry, so a re-asserted value can be suppressed indefinitely. Subscribe every tag that needs deterministic re-writes, or leave the option off.
|
||||
- **Auto-prohibited ranges** are visible via `GetAutoProhibitedRanges` and logged on first occurrence / on clear — use them to find protected register holes in a device's map.
|
||||
@@ -20,7 +20,8 @@ image (follow-up).
|
||||
**Integration layer** (task #215):
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.IntegrationTests/` stands up
|
||||
`mcr.microsoft.com/iotedge/opc-plc:2.14.10` via `Docker/docker-compose.yml`
|
||||
on `opc.tcp://localhost:50000`. `OpcPlcFixture` probes the port at
|
||||
on `opc.tcp://10.100.0.35:50000` (the shared Docker host; override via
|
||||
`OPCUA_SIM_ENDPOINT`). `OpcPlcFixture` probes the port at
|
||||
collection init + skips tests with a clear message when the container's
|
||||
not running (matches the Modbus/pymodbus + S7/python-snap7 skip pattern).
|
||||
Docker is the launcher — no PowerShell wrapper needed because opc-plc
|
||||
@@ -81,12 +82,15 @@ Capability surfaces whose contract is verified: `IDriver`, `ITagDiscovery`,
|
||||
|
||||
## What it does NOT cover
|
||||
|
||||
### 1. Real stack exchange
|
||||
### 1. Full real-stack exchange (unit tests only)
|
||||
|
||||
No UA Secure Channel is ever opened. Every test mocks `Session.ReadAsync`,
|
||||
`Session.CreateSubscription`, `Session.AddItem`, etc. — the SDK itself is
|
||||
trusted. Certificate validation, signing, nonce handling, chunk assembly,
|
||||
keep-alive cadence — all SDK-internal and untested here.
|
||||
The **unit** suite mocks `Session.ReadAsync`, `Session.CreateSubscription`,
|
||||
`Session.AddItem`, etc. — no UA Secure Channel is opened. The **integration**
|
||||
suite (`OpcUaClientSmokeTests`, task #215) does open a real Secure Channel
|
||||
against opc-plc and exercises Read + Subscribe end-to-end. What remains
|
||||
untested even in the integration suite: certificate validation under
|
||||
non-anonymous security policies, signing/encryption, nonce handling, chunk
|
||||
assembly, keep-alive cadence — all SDK-internal.
|
||||
|
||||
### 2. Subscription transfer across reconnect
|
||||
|
||||
@@ -124,14 +128,16 @@ ConditionType events (non-base `BaseEventType`) is not verified.
|
||||
|
||||
## When to trust OpcUaClient tests, when to reach for a server
|
||||
|
||||
| Question | Unit tests | Real upstream server |
|
||||
| --- | --- | --- |
|
||||
| "Does severity 750 bucket as High?" | yes | yes |
|
||||
| "Does the driver call `TransferSubscriptions` after reconnect?" | yes | yes |
|
||||
| "Does a real OPC UA read/write round-trip work?" | no | yes (required) |
|
||||
| "Does event-filter-based alarm subscription return ConditionType events?" | no | yes (required) |
|
||||
| "Does history read from AVEVA Historian return correct aggregates?" | no | yes (required) |
|
||||
| "Does the SDK's publish queue lose notifications under load?" | no | yes (stress) |
|
||||
| Question | Unit tests | Integration (opc-plc) | Real upstream server |
|
||||
| --- | --- | --- | --- |
|
||||
| "Does severity 750 bucket as High?" | yes | - | yes |
|
||||
| "Does the driver call `TransferSubscriptions` after reconnect?" | yes | - | yes |
|
||||
| "Does a real OPC UA read round-trip work?" | no | yes | yes |
|
||||
| "Does a real OPC UA subscribe deliver changes?" | no | yes | yes |
|
||||
| "Does write round-trip work against a live server?" | no | no (not yet exercised) | yes (required) |
|
||||
| "Does event-filter-based alarm subscription return ConditionType events?" | no | no | yes (required) |
|
||||
| "Does history read from AVEVA Historian return correct aggregates?" | no | no | yes (required) |
|
||||
| "Does the SDK's publish queue lose notifications under load?" | no | no | yes (stress) |
|
||||
|
||||
## Follow-up candidates
|
||||
|
||||
@@ -164,8 +170,17 @@ Beyond that:
|
||||
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/` — unit tests with
|
||||
mocked `Session`
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.IntegrationTests/OpcPlcFixture.cs`
|
||||
— collection fixture; parses `OPCUA_SIM_ENDPOINT` (default
|
||||
`opc.tcp://10.100.0.35:50000`), TCP-probes at collection init, records
|
||||
`SkipReason` when unreachable
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.IntegrationTests/OpcUaClientSmokeTests.cs`
|
||||
— wire-level test suite (3 `[Fact]` methods: read, batch read, subscribe)
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.IntegrationTests/Docker/docker-compose.yml`
|
||||
— `mcr.microsoft.com/iotedge/opc-plc:2.14.10` with `--ut --aa --alm --pn=50000`
|
||||
- `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs` — ctor +
|
||||
session-factory seam tests mock through
|
||||
session-factory seam tests mock through; implements `IAlarmSource` +
|
||||
`IHistoryProvider` (unique among drivers); does NOT implement `IRediscoverable`
|
||||
- `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs` —
|
||||
the v2 dual-endpoint integration harness a future loopback client test could
|
||||
piggyback on (v1 `OpcUaServerIntegrationTests.cs` retired with the v1 server project)
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
# OPC UA Client (Gateway) Driver
|
||||
|
||||
Getting-started guide for the OPC UA Client driver. This is the short path — for
|
||||
the full per-field spec read [`docs/v2/driver-specs.md §8`](../v2/driver-specs.md),
|
||||
and for the test-harness map read [OpcUaClient-Test-Fixture.md](OpcUaClient-Test-Fixture.md).
|
||||
|
||||
## What it talks to
|
||||
|
||||
A **remote OPC UA server**. This driver runs the *opposite* direction from the
|
||||
usual "server exposes PLC data" flow: it acts as an OPC UA **client**, opens a
|
||||
`Session` against an upstream server, and re-exposes that server's address space
|
||||
through the local OtOpcUa server. Browse, read, write, subscribe, alarm, and
|
||||
history calls are passed through to the upstream endpoint.
|
||||
|
||||
It is built on the OPC Foundation UA .NET Standard reference SDK and runs
|
||||
in-process in the OtOpcUa server's .NET 10 AnyCPU host — pure managed, no
|
||||
out-of-process isolation.
|
||||
|
||||
> There is **no standalone driver CLI** for the OPC UA Client driver. To exercise
|
||||
> a remote OPC UA endpoint by hand, point the general-purpose
|
||||
> [Client CLI](../Client.CLI.md) at it directly.
|
||||
|
||||
## Project split
|
||||
|
||||
| Project | Target | Role |
|
||||
|---------|--------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/` | net10.0 | In-process driver — session lifetime, read / write / subscribe / alarm / history passthrough |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser/` | net10.0 | `IDriverBrowser` — live address-picker browse used by the AdminUI |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Contracts/` | net10.0 | Config records + enums bound from `DriverConfig` JSON |
|
||||
|
||||
## Minimum deployment
|
||||
|
||||
```jsonc
|
||||
"Drivers": {
|
||||
"upstream-1": {
|
||||
"Type": "OpcUaClient",
|
||||
"Config": {
|
||||
"EndpointUrl": "opc.tcp://plc.internal:4840",
|
||||
"SecurityPolicy": "None",
|
||||
"SecurityMode": "None",
|
||||
"AuthType": "Anonymous",
|
||||
"TargetNamespaceKind": "Equipment"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`EndpointUrls` (a list) takes precedence over the single-URL `EndpointUrl` and
|
||||
provides ordered **failover** — the driver tries each candidate in turn at init
|
||||
and on session drop, and the first to connect wins (e.g. a hot-standby pair on
|
||||
4840 / 4841). See
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Contracts/OpcUaClientDriverOptions.cs`
|
||||
for every field (security policy/mode, auth type, session timeout, keep-alive,
|
||||
reconnect period, browse root, node/depth caps).
|
||||
|
||||
### Session lifetime
|
||||
|
||||
A single `Session` per driver instance; subscriptions multiplex onto it. The
|
||||
SDK reconnect handler takes the session down and brings it back on remote-server
|
||||
restart, re-sending subscriptions on reconnect so monitored-item handles don't
|
||||
dangle. Stored NodeIds embed the server-stable namespace **URI** (not the
|
||||
session-relative `ns=N` index) so a remote namespace-table reorder across a
|
||||
restart doesn't silently re-point references at the wrong namespace.
|
||||
|
||||
### Namespace assignment
|
||||
|
||||
This is the only driver that gateways into **either** namespace kind, decided
|
||||
per instance via `TargetNamespaceKind`:
|
||||
|
||||
- `Equipment` — the remote server exposes raw equipment data; remote browse
|
||||
paths are remapped to UNS via a required `UnsMappingTable`.
|
||||
- `SystemPlatform` — the remote server exposes processed/derived data; the
|
||||
remote hierarchy is preserved with no UNS conversion (and the mapping table
|
||||
must be empty).
|
||||
|
||||
The choice is enforced at startup so a misconfiguration fails draft validation
|
||||
rather than surfacing as a runtime surprise.
|
||||
|
||||
## Capability surface
|
||||
|
||||
`OpcUaClientDriver : IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IAlarmSource, IHistoryProvider`
|
||||
(`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs:31`).
|
||||
|
||||
| Capability | Path | Notes |
|
||||
|------------|------|-------|
|
||||
| `ITagDiscovery` | `DiscoverAsync` (recursive browse) | Mirrors the upstream tree from `BrowseRoot` (default `ObjectsFolder` i=85), bounded by `MaxDiscoveredNodes` / `MaxBrowseDepth` |
|
||||
| `IReadable` | `ReadAsync` → `Session.ReadAsync` | Upstream `StatusCode`s pass through verbatim (cascading-quality rule) |
|
||||
| `IWritable` | `WriteAsync` → `Session.WriteAsync` | Passthrough write |
|
||||
| `ISubscribable` | native OPC UA subscriptions / monitored items | The remote server pushes data changes |
|
||||
| `IHostConnectivityProbe` | session keep-alive | Host key is the endpoint URL actually connected to after the failover sweep |
|
||||
| `IAlarmSource` | `SubscribeAlarmsAsync` (EventFilter) + `AcknowledgeAsync` | Subscribes to upstream alarm/condition events and forwards acks |
|
||||
| `IHistoryProvider` | `ReadRawAsync` / `ReadProcessedAsync` / `ReadAtTimeAsync` → `Session.HistoryReadAsync` | **Unique to this driver** — passthrough history read against the upstream server |
|
||||
|
||||
> This driver does **not** implement `IRediscoverable` — there is no
|
||||
> push-driven rediscovery signal from a remote OPC UA server in this driver.
|
||||
> `IHistoryProvider` is implemented by no other driver; history reads for every
|
||||
> other source route server-side through `IHistoryRouter`.
|
||||
|
||||
### History passthrough
|
||||
|
||||
`IHistoryProvider` forwards `HistoryRead` to the upstream server's own historian.
|
||||
Raw, processed (Average / Minimum / Maximum / Total / Count aggregates mapped to
|
||||
OPC UA Part 13 standard aggregate NodeIds), and at-time reads are supported; each
|
||||
returned `DataValue` keeps its upstream `StatusCode` and timestamps verbatim.
|
||||
Event-history (`ReadEventsAsync`) is left at the interface default — the
|
||||
interface doesn't yet carry the EventFilter surface needed to forward it.
|
||||
|
||||
### Certificate trust
|
||||
|
||||
`AutoAcceptCertificates` accepts any self-signed / untrusted server certificate.
|
||||
It is **dev-only** — leave it `false` in production so a MITM against the
|
||||
opc.tcp channel fails closed.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/`
|
||||
cover the session lifecycle, namespace remapping, alarm/history passthrough,
|
||||
and config binding against a faked SDK session.
|
||||
- **Integration fixture** — exercises the driver against a reference OPC UA
|
||||
server (opc-plc) on the shared docker host; see
|
||||
[OpcUaClient-Test-Fixture.md](OpcUaClient-Test-Fixture.md) for the coverage map.
|
||||
|
||||
## Further reading
|
||||
|
||||
- [`docs/v2/driver-specs.md §8`](../v2/driver-specs.md) — full per-field spec,
|
||||
namespace-assignment rules, and cascading-quality detail
|
||||
- [OpcUaClient-Test-Fixture.md](OpcUaClient-Test-Fixture.md) — test-harness map
|
||||
- [Client.CLI.md](../Client.CLI.md) — general-purpose OPC UA client CLI for
|
||||
ad-hoc browsing of any endpoint
|
||||
+32
-19
@@ -9,8 +9,9 @@ OtOpcUa is a multi-driver OPC UA server. The Core (`ZB.MOM.WW.OtOpcUa.Core` + `C
|
||||
- `IHostConnectivityProbe` — per-host reachability events
|
||||
- `IPerCallHostResolver` — multi-host drivers that route each call to a target endpoint at dispatch time
|
||||
- `IAlarmSource` — driver-emitted OPC UA A&C events
|
||||
- `IHistoryProvider` — raw / processed / at-time / events HistoryRead (see [HistoricalDataAccess.md](../HistoricalDataAccess.md))
|
||||
- `IHistoryProvider` — driver-side raw / processed / at-time / events HistoryRead (see [HistoricalDataAccess.md](../v1/HistoricalDataAccess.md))
|
||||
- `IRediscoverable` — driver-initiated address-space rebuild notifications
|
||||
- `IHistorianDataSource` — server-side historian sink registration (the Wonderware Historian backend), distinct from the driver-side `IHistoryProvider` HistoryRead path
|
||||
|
||||
Each driver opts into only the capabilities it supports. Every async capability call at the Server dispatch layer goes through `CapabilityInvoker` (`Core/Resilience/CapabilityInvoker.cs`), which wraps it in a Polly pipeline keyed on `(DriverInstanceId, HostName, DriverCapability)`. The `OTOPCUA0001` analyzer enforces the wrap at build time. Drivers themselves never depend on Polly; they just implement the capability interface and let the Core wrap it.
|
||||
|
||||
@@ -20,25 +21,37 @@ Driver type metadata is registered at startup in `DriverTypeRegistry` (`src/Core
|
||||
|
||||
| Driver | Project path | Tier | Wire / library | Capabilities | Notable quirk |
|
||||
|--------|--------------|:----:|----------------|--------------|---------------|
|
||||
| [Galaxy](Galaxy.md) | `Driver.Galaxy.{Shared, Host, Proxy}` | C | MXAccess COM + `aahClientManaged` + SqlClient | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IAlarmSource, IHistoryProvider, IRediscoverable, IHostConnectivityProbe | Out-of-process — Host is its own Windows service (.NET 4.8 x86 for the COM bitness constraint); Proxy talks to Host over a named pipe |
|
||||
| Modbus TCP | `Driver.Modbus` | A | NModbus-derived in-house client | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe | Polled subscriptions via the shared `PollGroupEngine`. DL205 PLCs are covered by `AddressFormat=DL205` (octal V/X/Y/C/T/CT translation) — no separate driver |
|
||||
| Siemens S7 | `Driver.S7` | A | [S7netplus](https://github.com/S7NetPlus/s7netplus) | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe | Single S7netplus `Plc` instance per PLC serialized with `SemaphoreSlim` — the S7 CPU's comm mailbox is scanned at most once per cycle, so parallel reads don't help |
|
||||
| AB CIP | `Driver.AbCip` | A | libplctag CIP | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IAlarmSource | ControlLogix / CompactLogix. Tag discovery uses the `@tags` walker to enumerate controller-scoped + program-scoped symbols; UDT member resolution via the UDT template reader |
|
||||
| AB Legacy | `Driver.AbLegacy` | A | libplctag PCCC | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver | SLC 500 / MicroLogix. File-based addressing (`N7:0`, `F8:0`) — no symbol table, tag list is user-authored in the config DB |
|
||||
| TwinCAT | `Driver.TwinCAT` | B | Beckhoff `TwinCAT.Ads` (`TcAdsClient`) | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver | The only native-notification driver outside Galaxy — ADS delivers `ValueChangedCallback` events the driver forwards straight to `ISubscribable.OnDataChange` without polling. Symbol tree uploaded via `SymbolLoaderFactory` |
|
||||
| [FOCAS](FOCAS.md) | `Driver.FOCAS` | A | Pure-managed `FocasWireClient` — FOCAS/2 Ethernet binary protocol on TCP:8193, inlined into the driver assembly | IDriver, ITagDiscovery, IReadable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IAlarmSource | Read-only by design (WriteAsync returns `BadNotWritable`). CNC-shaped data model (axes, spindle, PMC, macros, alarms) not a flat tag map. Previously Tier-C (Host + P/Invoke + shim DLL); retired in the 2026-04-24 migration when the managed wire client landed |
|
||||
| OPC UA Client | `Driver.OpcUaClient` | B | OPCFoundation `Opc.Ua.Client` | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IAlarmSource, IHistoryProvider, IHostConnectivityProbe | Gateway/aggregation driver. Opens a single `Session` against a remote OPC UA server and re-exposes its address space. Owns its own `ApplicationConfiguration` (distinct from `Client.Shared`) because it's always-on with keep-alive + `TransferSubscriptions` across SDK reconnect, not an interactive CLI |
|
||||
| [Galaxy](Galaxy.md) | `Driver.Galaxy` (+ `.Browser`, `.Contracts`) | A | gRPC to the external `mxaccessgw` gateway (the gateway owns MXAccess COM + the Galaxy Repository SQL reader) | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IAlarmSource, IRediscoverable, IHostConnectivityProbe | In-process .NET 10 driver — the COM bitness constraint lives in the gateway's x86 net48 worker, not here. PR 7.2 retired the legacy in-process `Galaxy.{Shared, Host, Proxy}` + named-pipe Windows service. Native MxAccess alarms work end-to-end |
|
||||
| [Modbus TCP](Modbus.md) | `Driver.Modbus` | A | NModbus-derived in-house client | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver | Polled subscriptions via the shared `PollGroupEngine`. DL205 PLCs are covered by `AddressFormat=DL205` (octal V/X/Y/C/T/CT translation) — no separate driver |
|
||||
| [Siemens S7](S7.md) | `Driver.S7` | A | [S7netplus](https://github.com/S7NetPlus/s7netplus) | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe | Single S7netplus `Plc` instance per PLC serialized with `SemaphoreSlim` — the S7 CPU's comm mailbox is scanned at most once per cycle, so parallel reads don't help |
|
||||
| [AB CIP](AbCip.md) | `Driver.AbCip` | A | libplctag CIP | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IAlarmSource | ControlLogix / CompactLogix. Tag discovery uses the `@tags` walker to enumerate controller-scoped + program-scoped symbols; UDT member resolution via the UDT template reader |
|
||||
| [AB Legacy](AbLegacy.md) | `Driver.AbLegacy` | A | libplctag PCCC | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver | SLC 500 / MicroLogix. File-based addressing (`N7:0`, `F8:0`) — no symbol table, tag list is user-authored in the config DB |
|
||||
| [TwinCAT](TwinCAT.md) | `Driver.TwinCAT` | B | Beckhoff `TwinCAT.Ads` (`TcAdsClient`) | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IRediscoverable | The only native-notification driver outside Galaxy — ADS delivers `ValueChangedCallback` events the driver forwards straight to `ISubscribable.OnDataChange` without polling. Symbol tree uploaded via `SymbolLoaderFactory` |
|
||||
| [FOCAS](FOCAS.md) | `Driver.FOCAS` | A | Pure-managed `FocasWireClient` — FOCAS/2 Ethernet binary protocol on TCP:8193, inlined into the driver assembly | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IAlarmSource | `IWritable` is implemented but read-only by design — `WriteAsync` returns `BadNotWritable` for every point. CNC-shaped data model (axes, spindle, PMC, macros, alarms) not a flat tag map. Previously Tier-C (Host + P/Invoke + shim DLL); retired in the 2026-04-24 migration when the managed wire client landed |
|
||||
| [OPC UA Client](OpcUaClient.md) | `Driver.OpcUaClient` | B | OPCFoundation `Opc.Ua.Client` | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IAlarmSource, IHistoryProvider, IHostConnectivityProbe | Gateway/aggregation driver — the only driver implementing driver-side `IHistoryProvider` (forwards HistoryRead to the upstream server). Opens a single `Session` against a remote OPC UA server and re-exposes its address space. Owns its own `ApplicationConfiguration` (distinct from `Client.Shared`) because it's always-on with keep-alive + `TransferSubscriptions` across SDK reconnect, not an interactive CLI |
|
||||
| [Historian.Wonderware](Historian.Wonderware.md) | `Driver.Historian.Wonderware` (+ `.Client`, `.Client.Contracts`) | — | `aahClientManaged` write SDK + AVEVA Historian SQL, over a pipe IPC backend | IHistorianDataSource (server-side historian sink) | Not a tag driver — a historian backend that registers `IHistorianDataSource` (`HistorianDataSource : IHistorianDataSource`) to satisfy HistoryRead and to sink tag/alarm history. No `IDriver`/`ITagDiscovery` surface |
|
||||
|
||||
## Per-driver documentation
|
||||
|
||||
- **Galaxy** has its own docs in this folder because the out-of-process architecture + MXAccess COM rules + Galaxy Repository SQL + Historian + runtime probe manager don't fit a single table row:
|
||||
- [Galaxy.md](Galaxy.md) — COM bridge, STA pump, IPC, runtime probes
|
||||
- [Galaxy-Repository.md](Galaxy-Repository.md) — ZB SQL reader, `LocalPlatform` scope filter, change detection
|
||||
- **Galaxy** has its own docs in this folder because the gRPC-to-gateway architecture + MXAccess rules (owned by the gateway) + Galaxy Repository SQL + Historian + runtime probe manager don't fit a single table row:
|
||||
- [Galaxy.md](Galaxy.md) — gateway gRPC bridge, hierarchy source, runtime probes
|
||||
- [Galaxy-Repository.md](../v1/drivers/Galaxy-Repository.md) — ZB SQL reader, `LocalPlatform` scope filter, change detection (v1 archive)
|
||||
|
||||
- **FOCAS** has a short getting-started doc because the Tier-C two-project deployment + backend-selection env var + alarm projection opt-in all need explaining up front:
|
||||
- **FOCAS** has a short getting-started doc because the backend-selection env var + alarm projection opt-in need explaining up front:
|
||||
- [FOCAS.md](FOCAS.md) — deployment, config, capability surface, alarm projection, troubleshooting
|
||||
|
||||
- **All other drivers** share a single per-driver specification in [docs/v2/driver-specs.md](../v2/driver-specs.md) — addressing, data-type maps, connection settings, and quirks live there. That file is the authoritative per-driver reference; this index points at it rather than duplicating.
|
||||
- **Modbus TCP**, **AB CIP**, **AB Legacy**, **Siemens S7**, **TwinCAT**, and **OPC UA Client** each have a per-driver overview page:
|
||||
- [Modbus.md](Modbus.md) — in-process Modbus-TCP driver: address formats, polled subscription model, DL205 octal mapping
|
||||
- [AbCip.md](AbCip.md) — AB CIP / EtherNet-IP driver (ControlLogix / CompactLogix / Micro800 / GuardLogix): tag discovery, UDT resolution, alarm source
|
||||
- [AbLegacy.md](AbLegacy.md) — AB Legacy PCCC driver (SLC 500 / MicroLogix / PLC-5): file-based addressing, user-authored tag list
|
||||
- [S7.md](S7.md) — Siemens S7 driver (S7-300/400/1200/1500 + S7-200): getting started, config, data-block addressing, serialized single-connection model
|
||||
- [TwinCAT.md](TwinCAT.md) — Beckhoff TwinCAT (ADS) driver: getting started, native-notification subscription, symbol-tree upload
|
||||
- [OpcUaClient.md](OpcUaClient.md) — OPC UA Client (gateway/aggregation) driver: remote-server session, driver-side HistoryRead forwarding, reconnect behaviour
|
||||
|
||||
- **Historian.Wonderware** (server-side historian sink, not a tag driver) has its own overview page:
|
||||
- [Historian.Wonderware.md](Historian.Wonderware.md) — AVEVA Historian backend: sink registration, HistoryRead dispatch, alarm store-and-forward, deployment prerequisites
|
||||
|
||||
- The full per-field spec (capability surface, config schema, addressing, data-type maps, connection settings, quirks for every driver) lives in [docs/v2/driver-specs.md](../v2/driver-specs.md). The overview pages above are the short path; that file is the authoritative per-driver reference.
|
||||
|
||||
## Test-fixture coverage maps
|
||||
|
||||
@@ -50,13 +63,13 @@ Each driver has a dedicated fixture doc that lays out what the integration / uni
|
||||
- [AB Legacy](AbLegacy-Test-Fixture.md) — Dockerized `ab_server` PCCC mode across SLC500 / MicroLogix / PLC-5 profiles (task #224); N/F/L-file round-trip verified end-to-end. `/1,0` cip-path required for the Docker fixture; real hardware uses empty. Residual gap: bit-file writes (`B3:0/5`) still surface BadState — real HW / RSEmulate 500 for those
|
||||
- [TwinCAT](TwinCAT-Test-Fixture.md) — XAR-VM integration scaffolding (task #221); three smoke tests skip when VM unreachable. Unit via `FakeTwinCATClient` with native-notification harness
|
||||
- [FOCAS](FOCAS-Test-Fixture.md) — no integration fixture, unit-only via `FakeFocasClient`; Tier C out-of-process isolation scoped but not shipped
|
||||
- [OPC UA Client](OpcUaClient-Test-Fixture.md) — no integration fixture, unit-only via mocked `Session`; loopback against this repo's own server is the obvious next step
|
||||
- [Galaxy](Galaxy-Test-Fixture.md) — richest harness: E2E Host subprocess + ZB SQL live-smoke + MXAccess opt-in
|
||||
- [OPC UA Client](OpcUaClient-Test-Fixture.md) — Dockerized `opc-plc` integration suite (task #215): real Secure Channel + Session, read + subscribe verified end-to-end; write not yet exercised in the integration suite; exhaustive capability matrix (reconnect, failover, cert-auth, history, alarms) via unit suite with mocked `Session`
|
||||
- [Galaxy](../v1/drivers/Galaxy-Test-Fixture.md) — richest harness: gateway E2E + ZB SQL live-smoke + MXAccess opt-in (v1 archive)
|
||||
|
||||
## Related cross-driver docs
|
||||
|
||||
- [HistoricalDataAccess.md](../HistoricalDataAccess.md) — `IHistoryProvider` dispatch, aggregate mapping, continuation points. The Galaxy driver's Aveva Historian implementation is the first; OPC UA Client forwards to the upstream server; other drivers do not implement the interface and return `BadHistoryOperationUnsupported`.
|
||||
- [AlarmTracking.md](../AlarmTracking.md) — `IAlarmSource` event model and filtering.
|
||||
- [Subscriptions.md](../Subscriptions.md) — how the Server multiplexes subscriptions onto `ISubscribable.OnDataChange`.
|
||||
- [HistoricalDataAccess.md](../v1/HistoricalDataAccess.md) — `IHistoryProvider` dispatch, aggregate mapping, continuation points. The OPC UA Client driver is the only driver that implements driver-side `IHistoryProvider` (it forwards HistoryRead to the upstream server); the Aveva Historian path is served server-side by the Wonderware `IHistorianDataSource` sink instead. Other drivers do not implement the interface and return `BadHistoryOperationUnsupported`.
|
||||
- [AlarmTracking.md](../AlarmTracking.md) — `IAlarmSource` event model and filtering. Implemented by Galaxy (native MxAccess alarms, working end-to-end), OPC UA Client, AB CIP, and FOCAS; AB Legacy, Modbus, S7, and TwinCAT have no alarm source.
|
||||
- [Subscriptions.md](../v1/Subscriptions.md) — how the Server multiplexes subscriptions onto `ISubscribable.OnDataChange`.
|
||||
- [docs/v2/driver-stability.md](../v2/driver-stability.md) — tier system (A / B / C), shared `CapabilityPolicy` defaults per tier × capability, `MemoryTracking` hybrid formula, and process-level recycle rules.
|
||||
- [docs/v2/plan.md](../v2/plan.md) — authoritative vision, architecture decisions, migration strategy.
|
||||
|
||||
@@ -6,17 +6,19 @@ Coverage map + gap inventory for the S7 driver.
|
||||
[python-snap7](https://github.com/gijzelaerr/python-snap7)'s `Server` class
|
||||
(task #216). Atomic reads (u16 / i16 / i32 / f32 / bool-with-bit) + DB
|
||||
write-then-read round-trip are exercised end-to-end through S7netplus +
|
||||
real ISO-on-TCP on `localhost:1102`. Unit tests still carry everything
|
||||
else (address parsing, error-branch handling, probe-loop contract). Gaps
|
||||
remaining are variant-quirk-shaped: Optimized-DB symbolic access, PG/OP
|
||||
session types, PUT/GET-disabled enforcement — all need real hardware.
|
||||
real ISO-on-TCP on `10.100.0.35:1102` (the shared Docker host; override via
|
||||
`S7_SIM_ENDPOINT`). Unit tests still carry everything else (address parsing,
|
||||
error-branch handling, probe-loop contract). Gaps remaining are
|
||||
variant-quirk-shaped: Optimized-DB symbolic access, PG/OP session types,
|
||||
PUT/GET-disabled enforcement — all need real hardware.
|
||||
|
||||
## What the fixture is
|
||||
|
||||
**Integration layer** (task #216):
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.IntegrationTests/` stands up a
|
||||
python-snap7 `Server` via `Docker/docker-compose.yml --profile s7_1500`
|
||||
on `localhost:1102` (pinned `python:3.12-slim-bookworm` base +
|
||||
on `10.100.0.35:1102` (the shared Docker host; override via `S7_SIM_ENDPOINT`;
|
||||
pinned `python:3.12-slim-bookworm` base +
|
||||
`python-snap7>=2.0`). Docker is the only supported launch path.
|
||||
`Snap7ServerFixture` probes the port at collection init + skips with a
|
||||
clear message when unreachable (matches the pymodbus pattern).
|
||||
@@ -60,18 +62,20 @@ Wire-level surfaces verified: `IReadable`, `IWritable`.
|
||||
|
||||
## What it does NOT cover
|
||||
|
||||
### 1. Wire-level anything
|
||||
### 1. Wire-level anything (unit tests only)
|
||||
|
||||
No ISO-on-TCP frame is ever sent during the test suite. S7netplus is the only
|
||||
wire-path abstraction and it has no in-process fake mode; the shipping choice
|
||||
was to contract-test via `IS7Client` rather than patch into S7netplus
|
||||
internals.
|
||||
The **unit** suite (`S7DriverReadWriteTests`, etc.) sends no real ISO-on-TCP
|
||||
frames. S7netplus has no in-process fake mode; units contract-test via the
|
||||
`IS7Client` abstraction. The **integration** suite (`S7_1500SmokeTests`, task
|
||||
#216) does send real S7comm over ISO-on-TCP against the python-snap7 container
|
||||
and covers the basic read / write / typed-batch path.
|
||||
|
||||
### 2. Read/write happy path
|
||||
### 2. Error-branch unit tests vs. real round-trips
|
||||
|
||||
Every `S7DriverReadWriteTests` case exercises error branches. A successful
|
||||
read returning real PLC data is not tested end-to-end — the return value is
|
||||
whatever the fake says it is.
|
||||
`S7DriverReadWriteTests` (unit) exercises error paths only; return values come
|
||||
from the fake. The integration suite exercises the successful read / write
|
||||
round-trip, but only against the python-snap7 emulator — not a real Siemens
|
||||
CPU.
|
||||
|
||||
### 3. Mailbox serialization under concurrent reads
|
||||
|
||||
@@ -91,31 +95,40 @@ arrays of structs — not covered.
|
||||
|
||||
## When to trust the S7 tests, when to reach for a rig
|
||||
|
||||
| Question | Unit tests | Real PLC |
|
||||
| --- | --- | --- |
|
||||
| "Does the address parser accept X syntax?" | yes | - |
|
||||
| "Does the driver lifecycle hang / crash?" | yes | yes |
|
||||
| "Does a real read against an S7-1500 return correct bytes?" | no | yes (required) |
|
||||
| "Does mailbox serialization actually prevent PG timeouts?" | no | yes (required) |
|
||||
| "Does a UDT fan-out produce usable member variables?" | no | yes (required) |
|
||||
| Question | Unit tests | Integration (python-snap7) | Real PLC |
|
||||
| --- | --- | --- | --- |
|
||||
| "Does the address parser accept X syntax?" | yes | - | - |
|
||||
| "Does the driver lifecycle hang / crash?" | yes | yes | yes |
|
||||
| "Does a real read against an S7-1500 return correct bytes?" | no | yes (basic scalars) | yes (required for full type matrix) |
|
||||
| "Does mailbox serialization actually prevent PG timeouts?" | no | no | yes (required) |
|
||||
| "Does a UDT fan-out produce usable member variables?" | no | no | yes (required) |
|
||||
|
||||
## Follow-up candidates
|
||||
|
||||
1. **Snap7 server** — [Snap7](https://snap7.sourceforge.net/) ships a
|
||||
C-library-based S7 server that could run in-CI on Linux. A pinned build +
|
||||
a fixture shape similar to `ab_server` would give S7 parity with Modbus /
|
||||
AB CIP coverage.
|
||||
2. **Plcsim Advanced** — Siemens' paid emulator. Licensed per-seat; fits a
|
||||
lab rig but not CI.
|
||||
3. **Real S7 lab rig** — cheapest physical PLC (CPU 1212C) on a dedicated
|
||||
network port, wired via self-hosted runner.
|
||||
The python-snap7 fixture (task #216) covers scalar read / write / typed-batch.
|
||||
Remaining gaps need one of:
|
||||
|
||||
Without any of these, S7 driver correctness against real hardware is trusted
|
||||
1. **Plcsim Advanced** — Siemens' paid emulator; gives Optimized-DB symbolic
|
||||
access + PG/OP/S7-Basic session differentiation without real hardware.
|
||||
Licensed per-seat; fits a lab rig but not CI.
|
||||
2. **Real S7 lab rig** — cheapest physical PLC (CPU 1212C) on a dedicated
|
||||
network port, wired via self-hosted runner. Only path for mailbox
|
||||
serialization / PUT-GET enforcement verification.
|
||||
|
||||
Without either, S7 driver correctness for variant-quirk edge cases is trusted
|
||||
from field deployments, not from the test suite.
|
||||
|
||||
## Key fixture / config files
|
||||
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.Tests/` — unit tests only, no harness
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.IntegrationTests/Snap7ServerFixture.cs`
|
||||
— collection fixture; parses `S7_SIM_ENDPOINT` (default `10.100.0.35:1102`),
|
||||
TCP-probes at collection init, records `SkipReason` when unreachable
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.IntegrationTests/S7_1500/S7_1500SmokeTests.cs`
|
||||
— wire-level test suite (3 `[Fact]` methods: u16 read, typed batch, write-then-read)
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.IntegrationTests/Docker/docker-compose.yml`
|
||||
— one service per profile (`s7_1500`); binds `1102:1102` on the Docker host
|
||||
- `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.IntegrationTests/Docker/profiles/s7_1500.json`
|
||||
— DB1 + MB seed layout with typed seeds at known offsets
|
||||
- `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7/S7Driver.cs` — ctor takes
|
||||
`IS7ClientFactory` which tests fake; docstring lines 8-20 note the deferred
|
||||
integration fixture
|
||||
`IS7ClientFactory` which tests fake
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
# Siemens S7 Driver
|
||||
|
||||
Getting-started guide for the Siemens S7 driver. This is the short path — for
|
||||
the full per-field spec read [`docs/v2/driver-specs.md §5`](../v2/driver-specs.md),
|
||||
for hands-on CLI testing read [Driver.S7.Cli.md](../Driver.S7.Cli.md), and for
|
||||
the test-harness map read [S7-Test-Fixture.md](S7-Test-Fixture.md).
|
||||
|
||||
## What it talks to
|
||||
|
||||
Siemens S7 PLCs — S7-300, S7-400, S7-1200, S7-1500, plus S7-200 / S7-200 Smart
|
||||
/ LOGO! 0BA8 — over the native **S7comm** protocol on **ISO-on-TCP, TCP port
|
||||
102**. The wire is spoken by the pure-managed [S7netplus](https://github.com/S7NetPlus/s7netplus)
|
||||
(`S7.Net`) library: no native DLL, no P/Invoke, no out-of-process isolation. The
|
||||
driver runs in-process in the OtOpcUa server's .NET 10 AnyCPU host on every OS
|
||||
the server runs on.
|
||||
|
||||
This is the **leanest** OtOpcUa driver — read/write/subscribe/discover plus a
|
||||
connectivity probe, and nothing else. It implements no alarm source and no
|
||||
per-call host resolver (a single S7 instance targets a single CPU).
|
||||
|
||||
## Project split
|
||||
|
||||
| Project | Target | Role |
|
||||
|---------|--------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7/` | net10.0 | In-process driver — hosts the `S7.Net.Plc` connection and the address parser |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.Contracts/` | net10.0 | Dependency-free config records + enums (`S7DriverOptions`, `S7CpuType`, `S7DataType`) bound from `DriverConfig` JSON |
|
||||
|
||||
## Minimum deployment
|
||||
|
||||
Register the driver instance in the central config DB (or `appsettings.json`).
|
||||
No separate service, no DLL deployment:
|
||||
|
||||
```jsonc
|
||||
"Drivers": {
|
||||
"s7-line-1": {
|
||||
"Type": "S7",
|
||||
"Config": {
|
||||
"Host": "10.20.30.40",
|
||||
"CpuType": "S71500",
|
||||
"Rack": 0,
|
||||
"Slot": 0,
|
||||
"Tags": [
|
||||
{ "Name": "Running", "Address": "DB1.DBX0.0", "DataType": "Bool", "Writable": false },
|
||||
{ "Name": "Speed", "Address": "DB1.DBD4", "DataType": "Float32", "Writable": true }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
S7 exposes a symbol table, but `S7.Net` does not surface it — so the driver
|
||||
operates off a **static, per-site tag list**, not live symbol discovery.
|
||||
|
||||
### Rack / slot / CPU family
|
||||
|
||||
`CpuType` selects the ISO-TSAP slot byte used during the connection handshake;
|
||||
pick the family that matches the PLC exactly. `Rack` is almost always `0`
|
||||
(relevant only for distributed S7-400 racks). `Slot` conventions per family:
|
||||
S7-300 = slot 2, S7-400 = slot 2 or 3, S7-1200 / S7-1500 = slot 0 (onboard PN).
|
||||
A wrong slot causes a connection refusal during the handshake. See
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.Contracts/S7DriverOptions.cs` for the
|
||||
per-field defaults.
|
||||
|
||||
## Address forms
|
||||
|
||||
Addresses use Siemens TIA-Portal / STEP 7 Classic syntax, parsed by
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7/S7AddressParser.cs`:
|
||||
|
||||
| Area | Example | Meaning |
|
||||
|------|---------|---------|
|
||||
| Data block | `DB1.DBX0.0` / `DB1.DBW0` / `DB1.DBD4` | DB number + size suffix `X`(bit) / `B`(byte) / `W`(word) / `D`(dword), optional `.bit` for `DBX` |
|
||||
| Merker (M) | `MB0` / `MW0` / `MD4` / `M0.0` | Marker byte; size prefix `B`/`W`/`D`, or bare offset `.bit` for bit access |
|
||||
| Input (I) | `IB0` / `IW0` / `I0.0` | Process-image input |
|
||||
| Output (Q) | `QB0` / `QW0` / `Q0.0` | Process-image output |
|
||||
|
||||
Parsing is strict and runs once at `InitializeAsync` so a config typo fails fast
|
||||
at load instead of surfacing as `BadInternalError` on every read. Bit offsets
|
||||
must be 0-7, byte offsets non-negative, DB numbers >= 1.
|
||||
|
||||
> **Timer (`T{n}`) and Counter (`C{n}`)** addresses parse cleanly but the read
|
||||
> path has no decode case for them yet — the driver rejects them at init with an
|
||||
> explicit error rather than letting them surface a misleading type-mismatch.
|
||||
|
||||
## Data types
|
||||
|
||||
`S7DataType` declares the **semantic** type; `S7.Net` returns an unsigned boxed
|
||||
value (bool / byte / ushort / uint) that the driver reinterprets without an
|
||||
extra PLC round-trip. Wired through today: `Bool`, `Byte`, `Int16`, `UInt16`,
|
||||
`Int32`, `UInt32`, `Float32`. `Int64`, `UInt64`, `Float64`, `String`, and
|
||||
`DateTime` are declared in the enum but **rejected at init** — half-implemented
|
||||
types must not create OPC UA nodes that then return `BadNotSupported` on every
|
||||
access.
|
||||
|
||||
## Capability surface
|
||||
|
||||
`S7Driver : IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe`
|
||||
(`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7/S7Driver.cs`).
|
||||
|
||||
| Capability | Path | Notes |
|
||||
|------------|------|-------|
|
||||
| `IReadable` | `ReadAsync` → `S7.Net.Plc.ReadAsync` | One request/response per tag, serialized on a per-PLC semaphore |
|
||||
| `IWritable` | `WriteAsync` → `S7.Net.Plc.WriteAsync` | Read-only tags (`Writable=false`) return `BadNotWritable` |
|
||||
| `ITagDiscovery` | `DiscoverAsync` | Emits a flat `S7/` folder of the configured tags — no live browse |
|
||||
| `ISubscribable` | per-tag poll loop with capped exponential backoff | S7 has no push model; floor is 100 ms (the CPU services the comms mailbox once per scan) |
|
||||
| `IHostConnectivityProbe` | periodic `S7.Net.Plc.ReadStatusAsync` (CPU-status PDU) | `host:port` host key; `Running`/`Stopped` transitions raise `OnHostStatusChanged` |
|
||||
|
||||
### Single-connection policy
|
||||
|
||||
One `S7.Net.Plc` instance per PLC, serialized with a `SemaphoreSlim`.
|
||||
Parallelising reads against a single CPU doesn't help — the CPU scans its
|
||||
comms mailbox at most once per cycle and queues concurrent requests wire-side
|
||||
anyway, while wasting the CPU's 8-64 connection-resource budget.
|
||||
|
||||
## PUT/GET communication
|
||||
|
||||
S7-1200 / S7-1500 ship with **PUT/GET access disabled** by default. A driver
|
||||
pointed at a freshly-flashed CPU sees a hard access-denied fault. The driver
|
||||
maps it specifically to `BadNotSupported`, flags the instance `Faulted` (a
|
||||
configuration alert, not a transient fault), and does **not** blind-retry —
|
||||
because the CPU will keep refusing. Fix: enable PUT/GET communication in TIA
|
||||
Portal under *Protection & Security* for the CPU.
|
||||
|
||||
## Error mapping
|
||||
|
||||
| Condition | StatusCode | Health |
|
||||
|-----------|------------|--------|
|
||||
| Tag not in config | `BadNodeIdUnknown` | unchanged |
|
||||
| Read-only tag written | `BadNotWritable` | unchanged |
|
||||
| Unimplemented data type | `BadNotSupported` | unchanged |
|
||||
| PUT/GET denied | `BadNotSupported` | `Faulted` (config alert) |
|
||||
| CPU / hardware fault | `BadDeviceFailure` | `Degraded` |
|
||||
| Socket / timeout | `BadCommunicationError` | `Degraded` |
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.Tests/` cover the
|
||||
address parser, the reinterpret/box conversions, and the driver lifecycle.
|
||||
- **Integration fixture** — a Docker S7 simulator on the shared docker host; see
|
||||
[S7-Test-Fixture.md](S7-Test-Fixture.md) for the coverage map and endpoint.
|
||||
- **CLI** — [Driver.S7.Cli.md](../Driver.S7.Cli.md) documents the standalone
|
||||
read/write/probe CLI for manual checks against a real or simulated CPU.
|
||||
|
||||
## Further reading
|
||||
|
||||
- [`docs/v2/driver-specs.md §5`](../v2/driver-specs.md) — full per-field spec,
|
||||
DriverConfig JSON shape, and operational stability notes
|
||||
- [Driver.S7.Cli.md](../Driver.S7.Cli.md) — standalone S7 driver CLI
|
||||
- [S7-Test-Fixture.md](S7-Test-Fixture.md) — simulator + test-harness map
|
||||
@@ -0,0 +1,129 @@
|
||||
# Beckhoff TwinCAT (ADS) Driver
|
||||
|
||||
Getting-started guide for the Beckhoff TwinCAT driver. This is the short path —
|
||||
for the full per-field spec read [`docs/v2/driver-specs.md §6`](../v2/driver-specs.md),
|
||||
for hands-on CLI testing read [Driver.TwinCAT.Cli.md](../Driver.TwinCAT.Cli.md),
|
||||
and for the test-harness map read [TwinCAT-Test-Fixture.md](TwinCAT-Test-Fixture.md).
|
||||
|
||||
## What it talks to
|
||||
|
||||
Beckhoff PLC runtimes — **TwinCAT 2 and TwinCAT 3** — over the Beckhoff **ADS**
|
||||
protocol carried by **AMS** routing. The driver runs in-process in the OtOpcUa
|
||||
server's .NET 10 AnyCPU host. It compiles and runs without a local AMS router,
|
||||
but every wire call returns `BadCommunicationError` until a router is reachable
|
||||
(the router translates an AMS Net ID to an IP route).
|
||||
|
||||
Addressing is **symbol-based**: tags are referenced by their TwinCAT symbolic
|
||||
name (e.g. `MAIN.bStart`, `GVL.Counter`, `Motor1.Status.Running`) rather than by
|
||||
raw memory offset. One driver instance fans out to N targets, each identified by
|
||||
an AMS Net ID + port.
|
||||
|
||||
## Project split
|
||||
|
||||
| Project | Target | Role |
|
||||
|---------|--------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/` | net10.0 | In-process driver — hosts the ADS client, symbol-path parser, and per-device probe loops |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Contracts/` | net10.0 | Config records + the `TwinCATDataType` enum bound from `DriverConfig` JSON |
|
||||
|
||||
## Minimum deployment
|
||||
|
||||
```jsonc
|
||||
"Drivers": {
|
||||
"twincat-cell-1": {
|
||||
"Type": "TwinCAT",
|
||||
"Config": {
|
||||
"Devices": [ { "HostAddress": "ads://5.23.91.23.1.1:851", "DeviceName": "Cell1" } ],
|
||||
"Tags": [
|
||||
{ "Name": "Start", "DeviceHostAddress": "ads://5.23.91.23.1.1:851",
|
||||
"SymbolPath": "MAIN.bStart", "DataType": "Bool", "Writable": true },
|
||||
{ "Name": "Count", "DeviceHostAddress": "ads://5.23.91.23.1.1:851",
|
||||
"SymbolPath": "GVL.Counter", "DataType": "Int32", "Writable": false }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### AMS address form
|
||||
|
||||
`HostAddress` is an `ads://{netId}:{port}` URI parsed by
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATAmsAddress.cs`. The Net ID
|
||||
is six dot-separated octets (NOT an IP — a Beckhoff-specific identifier the
|
||||
router maps to a route); the port is the AMS service port (851 = TC3 PLC runtime
|
||||
1, 852 = runtime 2, 801 / 811 / 821 = TC2 PLC runtimes). Port defaults to 851
|
||||
when omitted (`ads://5.23.91.23.1.1`).
|
||||
|
||||
### Symbol path form
|
||||
|
||||
Symbol paths are parsed by
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATSymbolPath.cs`, which
|
||||
mirrors IEC 61131-3 structured-text identifiers: global-variable-list
|
||||
(`GVL.Counter`), program variable (`MAIN.bStart`), struct member access
|
||||
(`Motor1.Status.Running`), array subscripts (`Data[5]`, `Matrix[1,2]`), and
|
||||
bit-access (`Flags.0`).
|
||||
|
||||
## Tag discovery
|
||||
|
||||
`DiscoverAsync` always emits the pre-declared `Tags` as the authoritative config
|
||||
path, under `TwinCAT/{device}/`. When `EnableControllerBrowse` is set, the
|
||||
driver also walks each device's symbol table and surfaces controller-resident
|
||||
globals / program locals under a `Discovered/` sub-folder; any symbol-loader
|
||||
error falls back to pre-declared-only so a flaky symbol download never blocks
|
||||
discovery.
|
||||
|
||||
## Capability surface
|
||||
|
||||
`TwinCATDriver : IDriver, IReadable, IWritable, ITagDiscovery, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IRediscoverable`
|
||||
(`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriver.cs`).
|
||||
|
||||
| Capability | Path | Notes |
|
||||
|------------|------|-------|
|
||||
| `IReadable` | `ReadAsync` → ADS `ReadValueAsync` | Per-device client, lazily connected and serialized per device |
|
||||
| `IWritable` | `WriteAsync` → ADS `WriteValueAsync` | Read-only tags return `BadNotWritable` |
|
||||
| `ITagDiscovery` | `DiscoverAsync` | Pre-declared tags + opt-in controller symbol browse |
|
||||
| `ISubscribable` | native ADS notifications (default), poll fallback | `UseNativeNotifications=true` registers device notifications so the PLC pushes changes; `false` uses the shared `PollGroupEngine` |
|
||||
| `IHostConnectivityProbe` | per-device probe loop | One `HostConnectivityStatus` per configured device; `Running`/`Stopped` transitions raise `OnHostStatusChanged` |
|
||||
| `IPerCallHostResolver` | `ResolveHost` lookup in the tag map | Routes each call to the device of the referenced tag; returns an empty-string sentinel when unresolved |
|
||||
| `IRediscoverable` | symbol-version-changed callback | A PLC re-download fires `OnRediscoveryNeeded` so the address space is rebuilt |
|
||||
|
||||
### Rediscovery on PLC re-download
|
||||
|
||||
`IRediscoverable` is the distinguishing capability. When the ADS client detects
|
||||
`DeviceSymbolVersionInvalid` (1809 / 0x0711) — the documented TwinCAT
|
||||
symbol-version-changed signal, raised when a PLC program is re-downloaded —
|
||||
every symbol and notification handle is invalidated. The driver raises
|
||||
`OnRediscoveryNeeded` with a `TwinCAT` scope hint so Core rebuilds the address
|
||||
space rather than treating it as a transient connection error.
|
||||
|
||||
### Native notifications
|
||||
|
||||
By default the driver registers native ADS device notifications: the PLC pushes
|
||||
value changes on its own cycle, which is strictly better for latency and CPU
|
||||
than polling. `NotificationMaxDelayMs` lets TwinCAT coalesce notifications up to
|
||||
a batching delay for high-churn signals. Set `UseNativeNotifications=false` for
|
||||
deployments where the AMS router has notification limits you can't raise — then
|
||||
the driver falls through to the shared poll engine.
|
||||
|
||||
## Single-connection-per-device
|
||||
|
||||
Each device's ADS client is lazily connected and serialized by a per-device
|
||||
connect gate, so a concurrent read / write / probe can't race a client
|
||||
create-or-dispose. Probe-initiated connects use the probe timeout; reads and
|
||||
writes use the driver-wide `Timeout`.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit tests** — `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Tests/` cover
|
||||
the AMS / symbol-path parsers, the status mapper, and the driver lifecycle via
|
||||
a fake ADS client factory.
|
||||
- **Integration fixture** — see
|
||||
[TwinCAT-Test-Fixture.md](TwinCAT-Test-Fixture.md) for the harness map.
|
||||
- **CLI** — [Driver.TwinCAT.Cli.md](../Driver.TwinCAT.Cli.md) documents the
|
||||
standalone read/write/browse/probe CLI for manual checks.
|
||||
|
||||
## Further reading
|
||||
|
||||
- [`docs/v2/driver-specs.md §6`](../v2/driver-specs.md) — full per-field spec and
|
||||
DriverConfig JSON shape
|
||||
- [Driver.TwinCAT.Cli.md](../Driver.TwinCAT.Cli.md) — standalone TwinCAT driver CLI
|
||||
- [TwinCAT-Test-Fixture.md](TwinCAT-Test-Fixture.md) — test-harness map
|
||||
@@ -0,0 +1,146 @@
|
||||
# Documentation Audit — Design
|
||||
|
||||
**Date:** 2026-06-03
|
||||
**Status:** Approved (brainstorming complete) → ready for writing-plans
|
||||
**Branch:** `docs/documentation-audit` (off `master` @ `c6d9b20`)
|
||||
|
||||
## Goal
|
||||
|
||||
Perform an in-depth audit of the **live reference documentation** to ensure
|
||||
accuracy and completeness, correcting issues in place and writing
|
||||
documentation for every shipped-but-undocumented feature.
|
||||
|
||||
## Decisions
|
||||
|
||||
These were settled during brainstorming and are not open for re-litigation in
|
||||
the plan:
|
||||
|
||||
| Dimension | Decision |
|
||||
|---|---|
|
||||
| **Corpus** | Live reference docs only — top-level `docs/*.md` current-reference set, `docs/drivers/*.md`, `README.md`, `CLAUDE.md` (32 files). Excludes `docs/v1`, `docs/v2`, `docs/plans`, `docs/reqs`, `docs/v3`, `looseends.md`. |
|
||||
| **Output mode** | Fix in place, single pass → corrected docs + a change summary (delivered in chat, not committed). |
|
||||
| **Checks** | All four dimensions: structural integrity, stale-status reconciliation, code-reality cross-check, completeness gaps. |
|
||||
| **Gap handling** | Fill **every** gap — write documentation for all undocumented shipped features, small or large. |
|
||||
| **Approach** | C — deterministic baseline → code-first inventory → grouped vertical passes. |
|
||||
|
||||
## Out of scope
|
||||
|
||||
- Historical tiers (`v1/`, `v2/`, `plans/`, `reqs/`, `v3/`, `looseends.md`) — they
|
||||
are point-in-time records and are not edited.
|
||||
- The XML doc-comment pass (handled separately by the `/fixdocs` run on branch
|
||||
`chore/fixdocs-xml-doc-comments`).
|
||||
- Code changes. This is a documentation effort. If the audit finds a genuine
|
||||
**code** bug, it is *flagged in the summary, not fixed*.
|
||||
- Secrets must never be introduced into docs: `sql_login.txt`, `pki/`, and the
|
||||
dev gateway API key stay out of any committed file.
|
||||
|
||||
## Corpus & subsystem grouping
|
||||
|
||||
Phase 1 runs one full-depth pass per group (G1–G4). G5 is the Phase-2
|
||||
reconciliation group.
|
||||
|
||||
| Group | Files |
|
||||
|---|---|
|
||||
| **G1 — Server core & data path** | `OpcUaServer.md`, `AddressSpace.md`, `ReadWriteOperations.md`, `IncrementalSync.md`, `VirtualTags.md`, `ScriptedAlarms.md`, `AlarmTracking.md` |
|
||||
| **G2 — Drivers** | `docs/drivers/`: `README.md`, `Galaxy.md`, `FOCAS.md`, + 7 `*-Test-Fixture.md` (`AbLegacy`, `AbServer`, `FOCAS`, `Modbus`, `OpcUaClient`, `S7`, `TwinCAT`) |
|
||||
| **G3 — Security & operational** | `security.md`, `Redundancy.md`, `Reservations.md`, `ServiceHosting.md`, `StatusDashboard.md` |
|
||||
| **G4 — Client & CLI tooling** | `Client.CLI.md`, `Client.UI.md`, `DriverClis.md`, `Driver.{Modbus,AbCip,AbLegacy,S7,TwinCAT,FOCAS}.Cli.md` |
|
||||
| **G5 — Index & root (reconcile last)** | `docs/README.md`, `CLAUDE.md` |
|
||||
|
||||
**Already-suspected findings** (the design accounts for them; verify during the pass):
|
||||
|
||||
- Top-level `AlarmTracking.md` may be **orphaned** — the README index links to
|
||||
`v1/AlarmTracking.md`, not the top-level file. Resolve in G1.
|
||||
- `StatusDashboard.md` is a **stub pointer** (superseded by `v2/admin-ui.md`).
|
||||
Resolve in G3.
|
||||
- `CLAUDE.md` references both `docs/security.md` and `docs/Security.md` — a
|
||||
**case mismatch** that works on macOS but breaks on the Linux docker host.
|
||||
Resolve in G5.
|
||||
|
||||
## Phase 0 — deterministic baseline + code-first inventory
|
||||
|
||||
Two transient working artifacts produced **before any doc is edited**, kept
|
||||
under a scratch dir and **not committed** (lesson from the fixdocs run, where
|
||||
`OtOpcUa-docs-*.md` cluttered the repo root):
|
||||
|
||||
**(a) Structural checker.** Walks all 32 docs, extracts every markdown link and
|
||||
inline source path (`src/...`, `docs/...`, `scripts/...`, `tests/...`), and
|
||||
resolves each against the filesystem. Output: broken links / dead paths / case
|
||||
mismatches. Deterministic and re-runnable — it is also the Phase-2 exit gate.
|
||||
|
||||
**(b) Feature inventory from source.** Enumerated from code, *not* docs, so
|
||||
"fill every gap" has ground truth:
|
||||
|
||||
- **Drivers** — the driver projects under `src/Drivers/` (+ the
|
||||
`Historian.Wonderware` sidecar).
|
||||
- **Capabilities** — the `Core.Abstractions` interfaces (`IReadable`,
|
||||
`IWritable`, `ITagDiscovery`, `ISubscribable`, `IAlarmSource`,
|
||||
`IHistoryProvider`, `IHostConnectivityProbe`, `IPerCallHostResolver`).
|
||||
- **Config surface** — `appsettings.json` sections + bound Options classes
|
||||
(Security, Authentication.Ldap, Redundancy, MxAccess, …) and documented env
|
||||
vars (`OTOPCUA_ROLES`, …).
|
||||
- **CLI surface** — command verbs + flags from the `System.CommandLine`
|
||||
definitions in the client + 6 driver CLIs.
|
||||
- **Security profiles** — the values `SecurityProfileResolver` actually
|
||||
resolves.
|
||||
|
||||
Diffing the inventory against the docs yields the completeness worklist (what
|
||||
ships but is not documented) and grounds the code-reality cross-check.
|
||||
|
||||
## Phase 1 — per-group fix methodology
|
||||
|
||||
Each group is a vertical pass. For every doc in the group, all four dimensions
|
||||
are applied in order, then the group is committed together:
|
||||
|
||||
1. **Structural** — apply the doc's Phase-0 link/path findings: repair broken
|
||||
links, repoint moved `src/...` paths to current locations, fix case
|
||||
mismatches, resolve orphans (re-link, merge, or retire), replace stub
|
||||
pointers with real content or a correct pointer.
|
||||
2. **Stale-status** — locate state words / banners (`blocked`, `pending`,
|
||||
`not yet`, `planned`, `TODO`, `as of <date>`) and reconcile each against
|
||||
current reality (source + git history + known facts: v2 feature-complete,
|
||||
native alarms verified working). Rewrite to present-tense truth or delete if
|
||||
obsolete.
|
||||
3. **Code-reality cross-check** — verify every technical claim (namespace,
|
||||
class, file, `appsettings` key, env var, CLI verb/flag, described behavior)
|
||||
against the Phase-0 inventory and a direct source read. **Fixes go to the
|
||||
doc to match the code, never the reverse.** A genuine code bug is flagged in
|
||||
the summary, not changed.
|
||||
4. **Completeness** — take this group's slice of the inventory diff and write
|
||||
the missing docs: small inline additions for a missing key/flag, new
|
||||
sections or whole new pages for an undocumented driver/subsystem. Every new
|
||||
page is linked from its index (`README.md` / `drivers/README.md`).
|
||||
|
||||
**Hard scope rule:** edits land only in the 32 in-scope files. If an in-scope
|
||||
doc links into an out-of-scope tier and the *target moved*, fix the **link in
|
||||
the live doc** — never edit the historical artifact.
|
||||
|
||||
## Phase 2 — reconciliation & validation
|
||||
|
||||
**Cross-doc reconciliation (G5):** `docs/README.md` index integrity (every
|
||||
listed doc exists and is correctly described; newly written docs are added),
|
||||
"superseded by" pointers correct, and `CLAUDE.md` reconciled against reality
|
||||
(the `security.md`/`Security.md` casing, retired-project notes, the docs it
|
||||
names as canonical).
|
||||
|
||||
**Validation — the audit's "tests" are two re-runnable gates plus review:**
|
||||
|
||||
- **Structural gate** — re-run the Phase-0 checker → **zero** broken links /
|
||||
dead paths / case mismatches.
|
||||
- **Completeness gate** — re-run the inventory diff → every shipped feature is
|
||||
documented, or each deliberate exclusion is listed with a reason.
|
||||
- **Spot-verification** — a sample of code-reality fixes re-checked against
|
||||
source with `file:line` citations in the summary.
|
||||
- Each group is a reviewable commit; nothing touches code, secrets, or
|
||||
out-of-scope tiers.
|
||||
|
||||
## Output
|
||||
|
||||
The change summary (in chat, not committed): fixes grouped by dimension, the
|
||||
list of new docs written for completeness, and any code bugs flagged-not-fixed.
|
||||
|
||||
## Brainstorming task references
|
||||
|
||||
Native tasks created during brainstorming: #53 (explore), #54 (clarify), #55
|
||||
(approaches), #56 (present design), #57 (write design doc), #58 (transition to
|
||||
writing-plans).
|
||||
@@ -0,0 +1,329 @@
|
||||
# Documentation Audit Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Audit and fix the 32 live reference docs in place so they are accurate against today's source and complete (every shipped feature documented).
|
||||
|
||||
**Architecture:** Approach C — a deterministic Phase 0 baseline (a re-runnable link/path checker + a code-first feature inventory) feeds grouped vertical passes (G1 server-core, G2 drivers, G3 security/operational, G4 client+CLI), each applying all four audit dimensions per doc, then a Phase 2 reconciliation of the shared index/root docs plus a final corpus-wide gate.
|
||||
|
||||
**Tech Stack:** Markdown docs; a small Python 3 checker script; the OtOpcUa .NET 10 source tree as the ground truth for cross-checking.
|
||||
|
||||
**Design:** `docs/plans/2026-06-03-documentation-audit-design.md` (read it for the decisions; they are settled).
|
||||
|
||||
---
|
||||
|
||||
## Method note (read once)
|
||||
|
||||
This is a **documentation** deliverable — there is no xUnit suite to make red→green. The plan therefore adapts the TDD step shape: each task **identifies findings → applies fixes → verifies with the Phase-0 gate (scoped) → commits**. The executable verification is the structural checker (Task 1) plus per-task acceptance criteria. Do not invent unit tests for prose.
|
||||
|
||||
## Hard rules (apply to EVERY task)
|
||||
|
||||
1. **Scope:** edit ONLY the 32 in-scope files. Never edit out-of-scope tiers (`docs/v1`, `docs/v2`, `docs/plans` except this plan/design, `docs/reqs`, `docs/v3`, `looseends.md`). If an in-scope doc links into an out-of-scope tier and the **target moved**, fix the **link in the live doc** — never the historical artifact.
|
||||
2. **Direction:** docs change to match the code, **never** the reverse. If the code itself looks wrong, append a one-line entry to `.docs-audit/code-bug-flags.md` — do NOT change code.
|
||||
3. **Evidence:** every code-reality correction must be verified against a real source location; record `file:line` in the commit body or `.docs-audit/notes.md`. No fixes from memory or assumption.
|
||||
4. **Git safety:** stage files **explicitly by path**. NEVER `git add .` / `git add -A`. Never stage `sql_login.txt`, `src/Server/ZB.MOM.WW.OtOpcUa.Host/pki/`, or the `.docs-audit/` scratch dir. Never echo the dev gateway API key into a tracked file. No force-push, no `--no-verify`.
|
||||
5. **Branch:** all work on `docs/documentation-audit` (already checked out).
|
||||
|
||||
## Shared procedures (referenced by tasks as "Procedure P / C / Gate")
|
||||
|
||||
### Gate — structural checker
|
||||
|
||||
```bash
|
||||
python3 .docs-audit/check_links.py > .docs-audit/links-report.md 2>.docs-audit/links-summary.txt; cat .docs-audit/links-summary.txt
|
||||
```
|
||||
Exit 0 = zero issues. The report is tab-separated: `file <TAB> kind <TAB> tag <TAB> raw-target <TAB> case-hint`.
|
||||
|
||||
### Procedure P — per-doc audit (apply all four dimensions to one doc)
|
||||
|
||||
1. **Read** the whole doc.
|
||||
2. **Structural** — for each entry for this doc in `.docs-audit/links-report.md`: repair the broken link / repoint the dead `src|tests|scripts|docs/...` path to its verified current location / fix the case mismatch (use the `case-hint` column). Confirm every new target exists on disk.
|
||||
3. **Stale-status** — scan for state words (`blocked`, `pending`, `not yet`, `planned`, `TODO`, `TBD`, `as of <date>`, `will`, `coming`). For each, verify against source + `git log` + known facts (v2 feature-complete; native alarms verified working 2026-05-31). Rewrite to present-tense truth or delete if obsolete.
|
||||
4. **Code-reality cross-check** — for every technical claim (namespace, class, file, `appsettings` key, env var, CLI verb/flag, described behavior), open the cited source and verify. Fix the doc to match; record `file:line` evidence. Flag genuine code bugs to `.docs-audit/code-bug-flags.md`.
|
||||
5. **Inline completeness** — from this doc's slice of `.docs-audit/inventory-diff.md`, add small missing items that belong in an existing section (a missing config key, an undocumented flag, a one-paragraph gap). Whole-new-page gaps are deferred to the group completeness task (Procedure C).
|
||||
6. **Verify** — run the Gate; confirm zero issues attributable to this doc; eyeball that tables/code-fences/lists still render.
|
||||
7. **Commit** this one doc by explicit path: `git add <doc> && git commit -m "docs(audit): <doc> — accuracy + completeness pass"`.
|
||||
|
||||
### Procedure C — per-group completeness & cross-links
|
||||
|
||||
1. Take this group's domain slice of `.docs-audit/inventory-diff.md` (features with **no** doc coverage at all).
|
||||
2. For each, write the documentation: a new page under the appropriate dir, or a new section in the most relevant existing in-scope doc (judgment — prefer extending an existing doc over a thin new page).
|
||||
3. **Group-local index only:** G2 may update `docs/drivers/README.md`. Do **not** touch `docs/README.md` (top-level index) here — append each new top-level page to `.docs-audit/new-pages.md` for Task 26 (G5) to link in one place, avoiding cross-group collisions on the shared index.
|
||||
4. Run the Gate; commit new/edited files by explicit path.
|
||||
|
||||
---
|
||||
|
||||
## Phase 0 — deterministic baseline + code-first inventory
|
||||
|
||||
### Task 1: Structural checker script + initial run
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 2
|
||||
|
||||
**Files:**
|
||||
- Create: `.docs-audit/check_links.py` (untracked scratch — never committed)
|
||||
- Create (untracked): `.docs-audit/links-report.md`, `.docs-audit/links-summary.txt`
|
||||
|
||||
**Step 1: Ensure scratch dir is ignored.** If `.docs-audit/` is not already covered by `.gitignore`, add the line `.docs-audit/` to `.gitignore` and commit that one-line change (`git add .gitignore && git commit -m "chore: ignore .docs-audit scratch dir"`). This is the only non-doc file the plan commits.
|
||||
|
||||
**Step 2: Write `.docs-audit/check_links.py`:**
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
"""Structural link/path checker for the documentation audit (Phase 0 + final gate).
|
||||
Scans the 32 in-scope live-reference docs, resolves every markdown link and inline
|
||||
src|tests|scripts|docs path against the filesystem, and reports MISSING / CASE-MISMATCH."""
|
||||
import os, re, sys, glob
|
||||
|
||||
REPO = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
def in_scope():
|
||||
files = sorted(glob.glob(os.path.join(REPO, "docs", "*.md")))
|
||||
files += sorted(glob.glob(os.path.join(REPO, "docs", "drivers", "*.md")))
|
||||
files += [os.path.join(REPO, "README.md"), os.path.join(REPO, "CLAUDE.md")]
|
||||
return [f for f in files if os.path.isfile(f)]
|
||||
|
||||
LINK_RE = re.compile(r"\[[^\]]*\]\(([^)]+)\)")
|
||||
PATH_RE = re.compile(r"`?((?:src|tests|scripts|docs)/[A-Za-z0-9_./-]+)`?")
|
||||
|
||||
def case_insensitive_hint(path):
|
||||
d, name = os.path.split(path)
|
||||
if not os.path.isdir(d):
|
||||
return None
|
||||
for entry in os.listdir(d):
|
||||
if entry.lower() == name.lower():
|
||||
return os.path.join(d, entry)
|
||||
return None
|
||||
|
||||
def check(f):
|
||||
base = os.path.dirname(f)
|
||||
text = open(f, encoding="utf-8").read()
|
||||
out = []
|
||||
targets = [("link", m.group(1)) for m in LINK_RE.finditer(text)]
|
||||
targets += [("path", m.group(1)) for m in PATH_RE.finditer(text)]
|
||||
for kind, raw in targets:
|
||||
t = raw.split("#")[0].strip()
|
||||
if not t or re.match(r"^[a-z]+://", t) or t.startswith("mailto:"):
|
||||
continue
|
||||
if kind == "link":
|
||||
cand = os.path.normpath(os.path.join(base, t))
|
||||
else:
|
||||
cand = os.path.normpath(os.path.join(REPO, t.rstrip("./")))
|
||||
if os.path.exists(cand):
|
||||
continue
|
||||
hint = case_insensitive_hint(cand)
|
||||
tag = "CASE-MISMATCH" if hint else "MISSING"
|
||||
out.append((os.path.relpath(f, REPO), kind, tag, raw,
|
||||
os.path.relpath(hint, REPO) if hint else ""))
|
||||
return out
|
||||
|
||||
def main():
|
||||
docs = in_scope()
|
||||
issues = [row for f in docs for row in check(f)]
|
||||
for rel, kind, tag, raw, hint in issues:
|
||||
print(f"{rel}\t{kind}\t{tag}\t{raw}\t{hint}")
|
||||
print(f"{len(issues)} issue(s) across {len(docs)} docs", file=sys.stderr)
|
||||
sys.exit(1 if issues else 0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
**Step 3: Run it** (Gate). Expected on first run: a non-empty report (at minimum the `CLAUDE.md` → `docs/Security.md` case mismatch and the `AlarmTracking.md` orphan situation surface here). Confirm the script runs without a Python traceback and the count printed to stderr matches the report line count.
|
||||
|
||||
**Step 4:** Do NOT commit the script or reports (they are under the now-ignored `.docs-audit/`). Only the `.gitignore` line from Step 1 is committed.
|
||||
|
||||
**Acceptance:** `check_links.py` runs clean (no traceback), emits a tab-separated report, exits non-zero while issues remain. This same command is the per-task and final gate.
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Code-first feature inventory + coverage diff
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min (broad enumeration — split into sub-runs if needed)
|
||||
**Parallelizable with:** Task 1
|
||||
|
||||
**Files:**
|
||||
- Create (untracked): `.docs-audit/inventory.md`, `.docs-audit/inventory-diff.md`
|
||||
|
||||
**Step 1: Enumerate the shipped surface from source** into `.docs-audit/inventory.md`, grouped by domain so Procedure C can slice it:
|
||||
- **Drivers (G2 domain)** — every family under `src/Drivers/` (`AbCip`, `AbLegacy`, `FOCAS`, `Galaxy`, `Historian.Wonderware`, `Modbus`, `OpcUaClient`, `S7`, `TwinCAT`). For each, note the driver class + which capability interfaces it implements.
|
||||
- **Capabilities (G1 domain)** — the interfaces in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/` (`IReadable`, `IWritable`, `ITagDiscovery`, `ISubscribable`, `IAlarmSource`, `IHistoryProvider`, `IHostConnectivityProbe`, `IPerCallHostResolver`, plus `IDriver*`, `IAddressSpaceBuilder`, `IRediscoverable`).
|
||||
- **Config surface (G3 domain)** — top-level sections across `src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings*.json` and their bound Options classes (e.g. `Security`, `Authentication.Ldap`, `Redundancy`, `MxAccess`). List documented env vars (`OTOPCUA_ROLES`, …).
|
||||
- **Security profiles (G3 domain)** — the exact profile strings `SecurityProfileResolver` resolves (grep `src/Server/ZB.MOM.WW.OtOpcUa.Security/`).
|
||||
- **CLI surface (G4 domain)** — command verbs + options from the `System.CommandLine` definitions in `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI/` and each driver CLI under `src/Drivers/Cli/`.
|
||||
|
||||
**Step 2: Compute the coverage diff** into `.docs-audit/inventory-diff.md`. For each inventory item, grep the 32 in-scope docs for its primary token; mark `COVERED` / `PARTIAL` / `MISSING`. Helper:
|
||||
```bash
|
||||
grep -RIl --include='*.md' "<token>" docs/*.md docs/drivers/*.md README.md CLAUDE.md
|
||||
```
|
||||
Keep only `PARTIAL`/`MISSING` rows in the diff, tagged with the owning domain (G1–G4). This is the completeness worklist consumed by Procedure P step 5 (small/partial) and Procedure C (missing whole pages).
|
||||
|
||||
**Step 3:** No commit (scratch only).
|
||||
|
||||
**Acceptance:** `inventory.md` lists every shipped driver/capability/config-section/security-profile/CLI-verb with a source location; `inventory-diff.md` enumerates the gaps tagged by domain. A spot-check of 3 random inventory rows resolves to real source.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — grouped vertical passes
|
||||
|
||||
> All Phase 1 tasks are **blockedBy Task 1 and Task 2**. Every per-doc accuracy task edits only its own doc(s) → all are mutually parallelizable (disjoint files). Each group's completeness task (Procedure C) is blockedBy that group's accuracy tasks.
|
||||
|
||||
### G1 — Server core & data path
|
||||
|
||||
### Task 3: OpcUaServer.md
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** all other Phase-1 accuracy tasks (Tasks 4–7, 9–13, 15–18, 20–24)
|
||||
**Files:** Modify `docs/OpcUaServer.md`
|
||||
Apply **Procedure P**. Doc-specific focus: Core/driver-dispatch/Config-DB/generations claims vs `src/Core` + `src/Server`; verify `CapabilityInvoker`, `GenericDriverNodeManager`, generation-diff references resolve.
|
||||
|
||||
### Task 4: AddressSpace.md
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 3, 5–7, 9–13, 15–18, 20–24
|
||||
**Files:** Modify `docs/AddressSpace.md`
|
||||
Apply **Procedure P**. Focus: `GenericDriverNodeManager`, `ITagDiscovery`, `IAddressSpaceBuilder`, `DataTypeMap.cs` path.
|
||||
|
||||
### Task 5: ReadWriteOperations.md + IncrementalSync.md
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3,4,6,7,9–13,15–18,20–24
|
||||
**Files:** Modify `docs/ReadWriteOperations.md`, `docs/IncrementalSync.md`
|
||||
Apply **Procedure P** to each. Focus: `CapabilityInvoker`→`IReadable`/`IWritable`; `sp_ComputeGenerationDiff` + rebuild-on-redeploy.
|
||||
|
||||
### Task 6: VirtualTags.md + ScriptedAlarms.md
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–5,7,9–13,15–18,20–24
|
||||
**Files:** Modify `docs/VirtualTags.md`, `docs/ScriptedAlarms.md`
|
||||
Apply **Procedure P** to each. Focus: `Core.Scripting`/`Core.VirtualTags`/`Core.ScriptedAlarms` (Roslyn sandbox, Part 9 state machine). Cross-check against the named Core projects.
|
||||
|
||||
### Task 7: AlarmTracking.md (orphan resolution)
|
||||
**Classification:** small · **~4 min** · **Parallelizable with:** Tasks 3–6,9–13,15–18,20–24
|
||||
**Files:** Modify `docs/AlarmTracking.md` (and/or decide retirement)
|
||||
**Known finding:** the README index links to `docs/v1/AlarmTracking.md`, not this top-level file → it is likely orphaned. Apply **Procedure P**, then **decide**: (a) if it duplicates the v1 archive, replace its body with a short current-state pointer to the live alarm story (native alarms work end-to-end) + the v1 archive link; or (b) if it carries unique current content, keep & fix it and ensure Task 26 links it from `docs/README.md`. Record the decision in the commit body. Do not delete the file without noting why.
|
||||
|
||||
### Task 8: G1 completeness & cross-links
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** other groups' completeness tasks (14, 19, 25)
|
||||
**blockedBy:** Tasks 3,4,5,6,7
|
||||
**Files:** Create/Modify server-core docs as needed; append new top-level pages to `.docs-audit/new-pages.md`
|
||||
Apply **Procedure C** for the **G1 (capabilities/server-core)** slice of `inventory-diff.md`. Likely candidates: any capability interface or Core subsystem (e.g. `Core.AlarmHistorian`) with no live-doc home.
|
||||
|
||||
### G2 — Drivers
|
||||
|
||||
### Task 9: docs/drivers/README.md (index + capability matrix)
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 3–7,10–13,15–18,20–24
|
||||
**Files:** Modify `docs/drivers/README.md`
|
||||
Apply **Procedure P**. Focus: the eight-driver count + capability matrix vs the actual `src/Drivers/` families and the interfaces each implements (from `inventory.md`). Correct the matrix to match reality.
|
||||
|
||||
### Task 10: docs/drivers/Galaxy.md
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 3–7,9,11–13,15–18,20–24
|
||||
**Files:** Modify `docs/drivers/Galaxy.md`
|
||||
Apply **Procedure P**. Focus: in-process gRPC client → mxaccessgw sidecar; `GalaxyDriver`, `IGalaxyHierarchySource`, `DeployWatcher`, contained-name↔tag-name translation vs `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/`.
|
||||
|
||||
### Task 11: drivers/FOCAS.md + FOCAS-Test-Fixture.md
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–7,9,10,12,13,15–18,20–24
|
||||
**Files:** Modify `docs/drivers/FOCAS.md`, `docs/drivers/FOCAS-Test-Fixture.md`
|
||||
Apply **Procedure P** to each vs `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS`.
|
||||
|
||||
### Task 12: Modbus + AbServer + AbLegacy test-fixture docs
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–7,9–11,13,15–18,20–24
|
||||
**Files:** Modify `docs/drivers/Modbus-Test-Fixture.md`, `docs/drivers/AbServer-Test-Fixture.md`, `docs/drivers/AbLegacy-Test-Fixture.md`
|
||||
Apply **Procedure P** to each. Focus: docker-host endpoints (`10.100.0.35`), fixture compose paths, `lmxopcua` labels vs `tests/.../Docker/` + CLAUDE.md Docker section.
|
||||
|
||||
### Task 13: S7 + TwinCAT + OpcUaClient test-fixture docs
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–7,9–12,15–18,20–24
|
||||
**Files:** Modify `docs/drivers/S7-Test-Fixture.md`, `docs/drivers/TwinCAT-Test-Fixture.md`, `docs/drivers/OpcUaClient-Test-Fixture.md`
|
||||
Apply **Procedure P** to each (same fixture/endpoint focus as Task 12).
|
||||
|
||||
### Task 14: G2 completeness & drivers index
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 8,19,25
|
||||
**blockedBy:** Tasks 9,10,11,12,13
|
||||
**Files:** Create new `docs/drivers/*.md` as needed; Modify `docs/drivers/README.md` (group-local index)
|
||||
Apply **Procedure C** for the **G2 (drivers)** slice. Likely candidates: any `src/Drivers/` family lacking a dedicated doc (e.g. AbCip/AbLegacy/S7/TwinCAT/Modbus/OpcUaClient have CLI docs + fixtures but may lack a driver-overview page like Galaxy/FOCAS). Link any new page from `docs/drivers/README.md`. Top-level links → `.docs-audit/new-pages.md`.
|
||||
|
||||
### G3 — Security & operational
|
||||
|
||||
### Task 15: security.md
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,16–18,20–24
|
||||
**Files:** Modify `docs/security.md`
|
||||
Apply **Procedure P**. Focus: transport-security profile strings (vs `SecurityProfileResolver`), LDAP auth + group→role mapping, ACL trie, role grants, the OTOPCUA0001 analyzer. This is the highest-value accuracy doc — verify every profile/role/config-key against source.
|
||||
|
||||
### Task 16: Redundancy.md
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,15,17,18,20–24
|
||||
**Files:** Modify `docs/Redundancy.md`
|
||||
Apply **Procedure P**. Focus: `RedundancyCoordinator`, `ServiceLevelCalculator`, apply-lease, `RedundancySupport`/`ServerUriArray`/`ServiceLevel`, Prometheus metrics vs `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane`/`Runtime`.
|
||||
|
||||
### Task 17: ServiceHosting.md
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,15,16,18,20–24
|
||||
**Files:** Modify `docs/ServiceHosting.md`
|
||||
Apply **Procedure P**. Focus: single fused `OtOpcUa.Host` binary, `OTOPCUA_ROLES` gating (`admin`/`driver`/both), `AddWindowsService`, the optional Wonderware Historian sidecar vs `src/Server/ZB.MOM.WW.OtOpcUa.Host`.
|
||||
|
||||
### Task 18: Reservations.md + StatusDashboard.md (stub resolution)
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,15–17,20–24
|
||||
**Files:** Modify `docs/Reservations.md`, `docs/StatusDashboard.md`
|
||||
Apply **Procedure P** to `Reservations.md` (ZTag/SAPID external-ID reservations, publish-time claim/release). **StatusDashboard.md is a known stub pointer** (superseded by `v2/admin-ui.md`, which is out of scope): verify the pointer target still exists and the supersession statement is accurate; keep it a clean pointer (do not expand). If `v2/admin-ui.md` moved, fix the link only.
|
||||
|
||||
### Task 19: G3 completeness & cross-links
|
||||
**Classification:** standard · **~4 min** · **Parallelizable with:** Tasks 8,14,25
|
||||
**blockedBy:** Tasks 15,16,17,18
|
||||
**Files:** Create/Modify security/operational docs as needed; append top-level pages to `.docs-audit/new-pages.md`
|
||||
Apply **Procedure C** for the **G3 (config/security/operational)** slice — any `appsettings` section, security profile, or operational subsystem with no live-doc coverage.
|
||||
|
||||
### G4 — Client & CLI tooling
|
||||
|
||||
### Task 20: Client.CLI.md
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,15–18,21–24
|
||||
**Files:** Modify `docs/Client.CLI.md`
|
||||
Apply **Procedure P**. Focus: `otopcua-cli` verbs/flags (connect/read/write/browse/subscribe/historyread/alarms/redundancy) vs the `System.CommandLine` defs in `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI/`. Every documented command/flag must exist; every shipped command must be documented.
|
||||
|
||||
### Task 21: Client.UI.md
|
||||
**Classification:** small · **~4 min** · **Parallelizable with:** Tasks 3–7,9–13,15–18,20,22–24
|
||||
**Files:** Modify `docs/Client.UI.md`
|
||||
Apply **Procedure P** vs `src/Client/ZB.MOM.WW.OtOpcUa.Client.UI` (Avalonia desktop client).
|
||||
|
||||
### Task 22: DriverClis.md (index + shared commands)
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,15–18,20,21,23,24
|
||||
**Files:** Modify `docs/DriverClis.md`
|
||||
Apply **Procedure P**. Focus: the index must list exactly the driver CLIs that ship under `src/Drivers/Cli/`; shared command set matches the common base.
|
||||
|
||||
### Task 23: Driver.Modbus/AbCip/AbLegacy CLI docs
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,15–18,20–22,24
|
||||
**Files:** Modify `docs/Driver.Modbus.Cli.md`, `docs/Driver.AbCip.Cli.md`, `docs/Driver.AbLegacy.Cli.md`
|
||||
Apply **Procedure P** to each vs the matching CLI project under `src/Drivers/Cli/`. Verify verbs/flags + the documented device families.
|
||||
|
||||
### Task 24: Driver.S7/TwinCAT/FOCAS CLI docs
|
||||
**Classification:** small · **~5 min** · **Parallelizable with:** Tasks 3–7,9–13,15–18,20–23
|
||||
**Files:** Modify `docs/Driver.S7.Cli.md`, `docs/Driver.TwinCAT.Cli.md`, `docs/Driver.FOCAS.Cli.md`
|
||||
Apply **Procedure P** to each vs the matching CLI project under `src/Drivers/Cli/`.
|
||||
|
||||
### Task 25: G4 completeness & cross-links
|
||||
**Classification:** standard · **~4 min** · **Parallelizable with:** Tasks 8,14,19
|
||||
**blockedBy:** Tasks 20,21,22,23,24
|
||||
**Files:** Create/Modify client/CLI docs as needed; append top-level pages to `.docs-audit/new-pages.md`
|
||||
Apply **Procedure C** for the **G4 (client/CLI)** slice — any CLI verb or client surface with no doc coverage.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — reconciliation & final gate
|
||||
|
||||
### Task 26: G5 reconciliation — README index + CLAUDE.md
|
||||
**Classification:** standard · **~5 min** · **Parallelizable with:** none
|
||||
**blockedBy:** Tasks 8,14,19,25
|
||||
**Files:** Modify `docs/README.md`, `CLAUDE.md`
|
||||
1. **README index integrity:** every doc listed in `docs/README.md` exists and is described correctly; every new page recorded in `.docs-audit/new-pages.md` is added to the right table; resolve the `AlarmTracking.md` link per Task 7's decision; verify all "superseded by" pointers.
|
||||
2. **CLAUDE.md reconciliation:** fix the `docs/security.md` vs `docs/Security.md` **case mismatch** (canonical filename is lowercase `security.md`); verify the docs CLAUDE.md names as canonical exist; reconcile any retired-project / status notes against current reality.
|
||||
3. Run the **Gate**; commit both files by explicit path.
|
||||
|
||||
**Acceptance:** Gate attributes zero issues to `README.md`/`CLAUDE.md`; both `security.md` references use the on-disk casing; every new page is linked.
|
||||
|
||||
### Task 27: Final gate + change summary
|
||||
**Classification:** small · **~4 min** · **Parallelizable with:** none
|
||||
**blockedBy:** Task 26
|
||||
**Files:** none committed (verification + reporting only)
|
||||
1. **Structural gate (corpus-wide):** run the Gate → exit 0, `0 issue(s)`. If any remain, they are unfixed findings — return to the owning doc's task, do not hand-wave.
|
||||
2. **Completeness gate:** re-run the Task-2 coverage diff → every inventory item is `COVERED`, or each remaining gap is listed in the summary with an explicit reason for exclusion (e.g. "out-of-scope tier owns it").
|
||||
3. **Assemble the change summary** (deliver in chat, do not commit): fixes grouped by dimension (structural / stale-status / code-reality / completeness), the list of new docs written, the contents of `.docs-audit/code-bug-flags.md` (code bugs flagged-not-fixed), and any deliberate completeness exclusions.
|
||||
|
||||
**Acceptance:** both gates green; change summary delivered.
|
||||
|
||||
---
|
||||
|
||||
## Execution order & parallelism summary
|
||||
|
||||
- **Phase 0:** Tasks 1 ∥ 2 (no deps).
|
||||
- **Phase 1:** after Phase 0, all accuracy tasks (3–7, 9–13, 15–18, 20–24) run in parallel — disjoint files. Each group's completeness task (8, 14, 19, 25) follows its group's accuracy tasks; the four completeness tasks are mutually parallel.
|
||||
- **Phase 2:** Task 26 after all completeness tasks; Task 27 after 26.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-03-documentation-audit.md",
|
||||
"designPath": "docs/plans/2026-06-03-documentation-audit-design.md",
|
||||
"branch": "docs/documentation-audit",
|
||||
"tasks": [
|
||||
{"id": 1, "nativeTaskId": 59, "subject": "Task 1: Structural checker script + initial run", "status": "pending", "blockedBy": []},
|
||||
{"id": 2, "nativeTaskId": 60, "subject": "Task 2: Code-first feature inventory + coverage diff", "status": "pending", "blockedBy": []},
|
||||
{"id": 3, "nativeTaskId": 61, "subject": "Task 3: OpcUaServer.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 4, "nativeTaskId": 62, "subject": "Task 4: AddressSpace.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 5, "nativeTaskId": 63, "subject": "Task 5: ReadWriteOperations.md + IncrementalSync.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 6, "nativeTaskId": 64, "subject": "Task 6: VirtualTags.md + ScriptedAlarms.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 7, "nativeTaskId": 65, "subject": "Task 7: AlarmTracking.md (orphan resolution)", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 8, "nativeTaskId": 66, "subject": "Task 8: G1 completeness & cross-links", "status": "pending", "blockedBy": [3, 4, 5, 6, 7]},
|
||||
{"id": 9, "nativeTaskId": 67, "subject": "Task 9: docs/drivers/README.md (index + capability matrix)", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 10, "nativeTaskId": 68, "subject": "Task 10: docs/drivers/Galaxy.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 11, "nativeTaskId": 69, "subject": "Task 11: FOCAS.md + FOCAS-Test-Fixture.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 12, "nativeTaskId": 70, "subject": "Task 12: Modbus + AbServer + AbLegacy test-fixture docs", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 13, "nativeTaskId": 71, "subject": "Task 13: S7 + TwinCAT + OpcUaClient test-fixture docs", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 14, "nativeTaskId": 72, "subject": "Task 14: G2 completeness & drivers index", "status": "pending", "blockedBy": [9, 10, 11, 12, 13]},
|
||||
{"id": 15, "nativeTaskId": 73, "subject": "Task 15: security.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 16, "nativeTaskId": 74, "subject": "Task 16: Redundancy.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 17, "nativeTaskId": 75, "subject": "Task 17: ServiceHosting.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 18, "nativeTaskId": 76, "subject": "Task 18: Reservations.md + StatusDashboard.md (stub)", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 19, "nativeTaskId": 77, "subject": "Task 19: G3 completeness & cross-links", "status": "pending", "blockedBy": [15, 16, 17, 18]},
|
||||
{"id": 20, "nativeTaskId": 78, "subject": "Task 20: Client.CLI.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 21, "nativeTaskId": 79, "subject": "Task 21: Client.UI.md", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 22, "nativeTaskId": 80, "subject": "Task 22: DriverClis.md (index + shared commands)", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 23, "nativeTaskId": 81, "subject": "Task 23: Driver.Modbus/AbCip/AbLegacy CLI docs", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 24, "nativeTaskId": 82, "subject": "Task 24: Driver.S7/TwinCAT/FOCAS CLI docs", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 25, "nativeTaskId": 83, "subject": "Task 25: G4 completeness & cross-links", "status": "pending", "blockedBy": [20, 21, 22, 23, 24]},
|
||||
{"id": 26, "nativeTaskId": 84, "subject": "Task 26: G5 reconciliation — README index + CLAUDE.md", "status": "pending", "blockedBy": [8, 14, 19, 25]},
|
||||
{"id": 27, "nativeTaskId": 85, "subject": "Task 27: Final gate + change summary", "status": "pending", "blockedBy": [26]}
|
||||
],
|
||||
"lastUpdated": "2026-06-03"
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
# Scope: Equipment-Namespace Materialization in the Live Deploy Path
|
||||
|
||||
**Status:** Scoping (not yet a task plan)
|
||||
**Date:** 2026-06-06
|
||||
**Author:** investigation while building the Northwind UNS overlay (see `scadaproj/otopcua-uns-loader/`)
|
||||
**Depends on:** the driver value-streaming fixes already on `master` (`c1ce583`, `b1b3f3f`)
|
||||
|
||||
---
|
||||
|
||||
## 1. One-paragraph summary
|
||||
|
||||
OtOpcUa can build a **SystemPlatform** namespace (the Galaxy mirror) into the live OPC UA
|
||||
address space with streaming values, but it **cannot do the same for an `Equipment`-kind
|
||||
namespace**. The canonical UNS (`Enterprise/Site/Area/Line/Equipment/Signal`) that an Equipment
|
||||
namespace represents only ever materialises its **skeleton** (Area/Line/Equipment *folders*); the
|
||||
**signals under equipment** (`Tag`, `VirtualTag`, `ScriptedAlarm` rows) never appear, because the
|
||||
component that turns those rows into OPC UA variables — `EquipmentNodeWalker` — is **fully built
|
||||
and unit-tested but never invoked in production**, and the live rebuild path doesn't carry the
|
||||
data it needs. This document scopes the work to finish that pipeline.
|
||||
|
||||
---
|
||||
|
||||
## 2. What works vs. what doesn't (verified 2026-06-06)
|
||||
|
||||
**Works — SystemPlatform / Galaxy mirror (reference implementation):**
|
||||
A deploy materialises one folder per Galaxy object and one variable per `Tag` row, and the driver
|
||||
streams live values into them. Verified live: 396 tags across 40 machines, all `Good`, on
|
||||
`opc.tcp://localhost:4840`. Path:
|
||||
`OpcUaPublishActor.HandleRebuild` → `Phase7Applier.MaterialiseHierarchy` +
|
||||
`Phase7Applier.MaterialiseGalaxyTags`, and values via the `DriverHostActor` SubscribeBulk pass
|
||||
(`b1b3f3f`).
|
||||
|
||||
**Doesn't work — Equipment namespace:**
|
||||
Deploying an `Equipment` namespace + a `UnsArea`/`UnsLine`/`Equipment` + one `VirtualTag` produced:
|
||||
|
||||
```
|
||||
Phase7Applier: hierarchy materialised (areas=1, lines=1, equipment=1) ← folders only
|
||||
```
|
||||
|
||||
…and the equipment node had **zero child variables** — the VirtualTag never materialised. There
|
||||
was no equipment-tag/virtual-tag log line at all.
|
||||
|
||||
---
|
||||
|
||||
## 3. Root cause (precise)
|
||||
|
||||
Three gaps, in order of how fundamental they are:
|
||||
|
||||
### 3.1 `EquipmentNodeWalker` is built + tested but never wired (the core gap)
|
||||
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/EquipmentNodeWalker.cs` — `Walk(IAddressSpaceBuilder, EquipmentNamespaceContent)`
|
||||
materialises every `Equipment` row as a folder and every Equipment-bound `Tag` / `VirtualTag` /
|
||||
`ScriptedAlarm` as a variable (NodeId = `DriverAttributeInfo.FullName`, i.e. the tag's
|
||||
`TagConfig.FullName`; VirtualTag uses its `VirtualTagId`).
|
||||
- **The only call sites are in `tests/Core/.../EquipmentNodeWalkerTests.cs`.** Nothing in `src`
|
||||
ever calls `EquipmentNodeWalker.Walk`, and nothing builds its input record
|
||||
`EquipmentNamespaceContent` (the record exists; no producer exists).
|
||||
- The live rebuild — `src/Server/.../Runtime/OpcUa/OpcUaPublishActor.cs:HandleRebuild` — calls
|
||||
`MaterialiseHierarchy` (the Area/Line/Equipment *folders*) and `MaterialiseGalaxyTags`
|
||||
(SystemPlatform only). It never calls `EquipmentNodeWalker`.
|
||||
- `OtOpcUaNodeManager` references `EquipmentNodeWalker` only in a header comment — not in
|
||||
`CreateAddressSpace`.
|
||||
|
||||
### 3.2 The deployment composition/artifact drops equipment signals
|
||||
|
||||
- `Phase7CompositionResult` (`src/Server/.../OpcUaServer/Phase7Composer.cs`) carries
|
||||
`UnsAreas`, `UnsLines`, `EquipmentNodes` *(EquipmentId, DisplayName, UnsLineId — no tags)*,
|
||||
`DriverInstancePlans`, `ScriptedAlarmPlans`, `GalaxyTags`. **There is no equipment-tag or
|
||||
virtual-tag list.**
|
||||
- `DeploymentArtifact.ParseComposition` (`src/Server/.../Runtime/Drivers/DeploymentArtifact.cs`)
|
||||
reads the artifact's `Tags` array but **`BuildGalaxyTagPlans` explicitly skips any tag with a
|
||||
non-null `EquipmentId`** (line ~176). So even though equipment tags are serialised into the
|
||||
artifact, the composition the node consumes throws them away.
|
||||
|
||||
So even if `EquipmentNodeWalker` were wired into `HandleRebuild`, it would have no equipment-tag
|
||||
data to walk.
|
||||
|
||||
### 3.3 No value source for Equipment-namespace signals
|
||||
|
||||
Equipment signals can be valued two ways; **neither is currently wired**:
|
||||
|
||||
- **Driver-sourced `Tag` (e.g. an OPC UA Client remap of the Galaxy mirror):** the **OpcUaClient
|
||||
driver has no factory registration** — `DriverFactoryBootstrap.Register`
|
||||
(`src/Server/.../Host/Drivers/DriverFactoryBootstrap.cs`) wires AbCip/AbLegacy/FOCAS/Galaxy/
|
||||
Modbus/S7/TwinCAT but **not OpcUaClient**, so an `OpcUaClient` `DriverInstance` silently stubs →
|
||||
no values. (The driver itself, `IDriver,ITagDiscovery,IReadable,IWritable,ISubscribable,…`,
|
||||
exists and is otherwise complete; only the factory `Register` is missing.)
|
||||
- **`VirtualTag` (script mirrors a live tag):** the `DependencyMuxActor` + a real
|
||||
`IVirtualTagEvaluator` are registered (`Runtime/ServiceCollectionExtensions.cs:100`,
|
||||
`Host/Program.cs:102`), and driver values now reach the mux (`DriverHostActor.ForwardToMux`,
|
||||
`b1b3f3f`). But `VirtualTagContext.GetTag` reads from a per-evaluation cache fed by an
|
||||
`ITagUpstreamSource`, and **no concrete `ITagUpstreamSource` is registered in the Host** — and
|
||||
it is unverified whether a `VirtualTag` in an `Equipment` namespace can resolve a tag that lives
|
||||
in the `SystemPlatform` namespace (cross-namespace `ctx.GetTag("/TestMachine_001/…")`).
|
||||
|
||||
---
|
||||
|
||||
## 4. Goal / acceptance criteria
|
||||
|
||||
A deploy that includes an `Equipment`-kind namespace results in, on `opc.tcp://…:4840`:
|
||||
|
||||
1. **Structure:** browsable `…/<area>/<line>/<equipment>/<signal>` for every `UnsArea`/`UnsLine`/
|
||||
`Equipment`/`Tag`(+`VirtualTag`,`ScriptedAlarm`) row — folders **browse-named by their friendly
|
||||
`Name`**, not their logical Id (see §6.4).
|
||||
2. **Values:** each signal carries a live `Good` value (driver-sourced and/or VirtualTag-derived).
|
||||
3. **Reload-safe:** survives a node restart with no re-deploy (must run on the `RestoreApplied`
|
||||
bootstrap path added in `b1b3f3f`, not just on a fresh apply).
|
||||
4. **Verifiable headlessly:** the `scadaproj/otopcua-uns-loader` tool's `verify` passes against the
|
||||
company-shape namespace (extend it to browse the Equipment tree).
|
||||
|
||||
---
|
||||
|
||||
## 5. Workstreams
|
||||
|
||||
### WS-1 — Carry equipment signals through the composition (foundational)
|
||||
Extend `Phase7CompositionResult` with equipment `Tag`/`VirtualTag`/`ScriptedAlarm` plans (or reuse
|
||||
`EquipmentNamespaceContent`), populate them in `Phase7Composer.Compose`, serialise them in the
|
||||
deployment artifact, and parse them in `DeploymentArtifact.ParseComposition` (stop discarding
|
||||
`EquipmentId != null` tags). **Risk: medium** (touches composer + artifact format + planner; needs
|
||||
a format-compat story for already-sealed artifacts). **Effort: ~1–2 days.**
|
||||
|
||||
### WS-2 — Materialise equipment signals in the live rebuild (wire the existing component)
|
||||
In `OpcUaPublishActor.HandleRebuild`, after `MaterialiseHierarchy`, build `EquipmentNamespaceContent`
|
||||
from the composition and call the already-tested `EquipmentNodeWalker.Walk`. Make it idempotent and
|
||||
diff-aware to match the existing Galaxy-tag pass. **Risk: low–medium** (component is tested; this is
|
||||
wiring + an idempotency pass). **Effort: ~0.5–1 day.**
|
||||
|
||||
### WS-3 — Value path (pick one or both; see §6.1)
|
||||
- **3a VirtualTag route:** register a concrete `ITagUpstreamSource` in the Host that bridges the
|
||||
`DependencyMuxActor`'s tag values into the VirtualTag read-cache; confirm/enable cross-namespace
|
||||
`ctx.GetTag` resolution (Equipment VirtualTag reading a SystemPlatform mirror tag) and the
|
||||
dependency-graph re-evaluation trigger. **Risk: high** (cross-namespace resolution + dependency
|
||||
tracking are unproven end-to-end). **Effort: ~2–3 days.**
|
||||
- **3b OpcUaClient route:** write+register `OpcUaClientDriverFactoryExtensions.Register` and add it
|
||||
to `DriverFactoryBootstrap.Register`; extend the SubscribeBulk pass to also subscribe
|
||||
Equipment-namespace `Tag` refs (`TagConfig.FullName`; NodeId == FullName, so the existing
|
||||
`ForwardToMux` value routing already applies — a ~30-line generalisation prototyped and reverted
|
||||
this session); decide the self-referential endpoint topology (a MAIN driver node OPC-UA-clienting
|
||||
into its own `:4840` Galaxy mirror, vs a second cluster). **Risk: high** (unfinished driver +
|
||||
self-loop topology). **Effort: ~2–4 days.**
|
||||
|
||||
### WS-4 — Browse-name fix (cosmetic but required for a usable shape)
|
||||
Today the UNS folder browse name is the logical **Id** (observed `nw-area-filling`), not the
|
||||
friendly `Name` (`filling`). Confirm whether `Phase7Applier.MaterialiseHierarchy` /
|
||||
`EquipmentNode.DisplayName` should use `UnsArea.Name`/`UnsLine.Name`/`Equipment.Name` for the
|
||||
BrowseName (keeping the Id as the NodeId). **Risk: low.** **Effort: ~0.5 day.**
|
||||
|
||||
### WS-5 — Tests + headless verification
|
||||
Unit: composer carries equipment signals; `HandleRebuild` materialises them; round-trip artifact
|
||||
parse. Integration: a docker-dev deploy of a small Equipment namespace browses + reads `Good`.
|
||||
Extend `otopcua-uns-loader verify` to assert the Equipment tree. **Effort: ~1 day.**
|
||||
|
||||
---
|
||||
|
||||
## 6. Design decisions / open questions
|
||||
|
||||
1. **VirtualTag (3a) vs OpcUaClient (3b) for live values.** VirtualTags reuse the live Galaxy
|
||||
mirror in-process (no second OPC UA session) but lean on unproven cross-namespace script
|
||||
resolution; OpcUaClient is the documented "remote-equipment → UNS" pattern but needs an
|
||||
unfinished driver factory and a self-referential session. **Recommendation:** prototype 3a first
|
||||
(smaller surface, no new driver), fall back to 3b if cross-namespace resolution proves
|
||||
intractable. A structure-only milestone (WS-1/2/4, no values) is independently shippable.
|
||||
2. **Cross-namespace `ctx.GetTag`.** Does an Equipment-namespace VirtualTag resolve a
|
||||
SystemPlatform Galaxy tag by browse path (`/TestMachine_001/TestChangingInt`) or by reference
|
||||
(`TestMachine_001.TestChangingInt`)? Determines the script-authoring contract. Must be settled
|
||||
before WS-3a.
|
||||
3. **Artifact format compatibility.** Adding equipment signals to the artifact changes its shape;
|
||||
ensure older sealed artifacts still parse (the parser is tolerant today — keep it so).
|
||||
4. **Browse-name source** (WS-4) — `Name` vs `Id`. Picking `Name` makes the company shape readable;
|
||||
confirm nothing keys off the Id-as-BrowseName.
|
||||
|
||||
---
|
||||
|
||||
## 7. Recommended sequencing
|
||||
|
||||
1. **WS-1 + WS-2 + WS-4 (structure-only):** Equipment namespaces browse the real
|
||||
`…/area/line/equipment/signal` shape with `BadWaitingForInitialData` leaves. Independently
|
||||
shippable; de-risks the composition/materialisation half.
|
||||
2. **WS-3a (VirtualTag values):** lights the structure up by mirroring the live Galaxy tags.
|
||||
3. **WS-3b (OpcUaClient driver):** only if a true remote-equipment driver path is wanted beyond the
|
||||
Galaxy mirror.
|
||||
4. **WS-5** throughout.
|
||||
|
||||
**Rough total:** structure-only ≈ 2–3.5 days; +VirtualTag values ≈ +2–3 days.
|
||||
|
||||
## 8. Out of scope
|
||||
- Authoring the company UNS rows (the `scadaproj/otopcua-uns-loader` tool already generates them
|
||||
from `company-uns.json`).
|
||||
- Any change to the SystemPlatform/Galaxy path, which works.
|
||||
- The AdminUI UNS editor.
|
||||
|
||||
## 9. Key references
|
||||
- Works (reference): `OpcUaPublishActor.HandleRebuild` + `Phase7Applier.MaterialiseGalaxyTags`;
|
||||
SubscribeBulk in `DriverHostActor` (commits `c1ce583`, `b1b3f3f`).
|
||||
- Built-but-unwired: `Core/OpcUa/EquipmentNodeWalker.cs` (+ `EquipmentNamespaceContent`),
|
||||
tested only by `EquipmentNodeWalkerTests.cs`.
|
||||
- Composition gap: `OpcUaServer/Phase7Composer.cs` (`Phase7CompositionResult`),
|
||||
`Runtime/Drivers/DeploymentArtifact.cs` (`BuildGalaxyTagPlans` skips `EquipmentId != null`).
|
||||
- Value gaps: `Host/Drivers/DriverFactoryBootstrap.cs` (no OpcUaClient registration);
|
||||
`Core.VirtualTags/ITagUpstreamSource.cs` (no Host registration found).
|
||||
- The consuming tool + the company model: `scadaproj/otopcua-uns-loader/`, `scadaproj/company-uns.json`.
|
||||
@@ -0,0 +1,212 @@
|
||||
# Equipment-Namespace Structure Materialization — Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** A deploy that includes an `Equipment`-kind namespace materialises its full
|
||||
`Area / Line / Equipment / Signal` browse tree into the live OPC UA address space on `:4840`,
|
||||
with friendly-`Name` browse names and `BadWaitingForInitialData` leaf values. (Live **values** are
|
||||
a separate later milestone and are explicitly out of scope here.)
|
||||
|
||||
**Architecture:** The live rebuild (`OpcUaPublishActor.HandleRebuild`) is **sink-based** — it drives
|
||||
`Phase7Applier` against an `IOpcUaAddressSpaceSink`, materialising the Area/Line/Equipment folder
|
||||
skeleton (`MaterialiseHierarchy`) and SystemPlatform/Galaxy variables (`MaterialiseGalaxyTags`).
|
||||
Today there is **no equipment-signal pass**: `Equipment`-bound `Tag`/`VirtualTag`/`ScriptedAlarm`
|
||||
rows never become variables. This plan adds that pass, mirroring `MaterialiseGalaxyTags`, fed by
|
||||
equipment data carried in the deployment composition. It also makes the UNS folders browse by their
|
||||
friendly `Name`.
|
||||
|
||||
**Tech Stack:** .NET 10, Akka.NET actors, EF Core (SQL Server), OPC UA SDK. Build/test from the repo
|
||||
root: `dotnet build`, `dotnet test`. Per-task tests live under `tests/Server/…` and `tests/Core/…`.
|
||||
|
||||
**Background (read first):** `docs/plans/2026-06-06-equipment-namespace-materialization-scope.md` —
|
||||
this plan implements its WS-1, WS-2, WS-4 (+ tests). The reference implementation to mirror is the
|
||||
Galaxy path: `Phase7Applier.MaterialiseGalaxyTags` + `OpcUaPublishActor.HandleRebuild`.
|
||||
|
||||
---
|
||||
|
||||
## Architecture decisions (resolve before/while implementing)
|
||||
|
||||
These are surfaced from the investigation; Task 0 records the chosen answers in the plan/code.
|
||||
|
||||
1. **Reuse `EquipmentNodeWalker` vs add a sink pass.** `EquipmentNodeWalker.Walk` is fully built +
|
||||
unit-tested but writes to an `IAddressSpaceBuilder` (the driver-discovery API), whereas the
|
||||
rebuild path writes to `IOpcUaAddressSpaceSink`. Two ways to bridge:
|
||||
- **(A, recommended) Add `Phase7Applier.MaterialiseEquipmentTags(composition)`** — sink-based,
|
||||
a near-copy of `MaterialiseGalaxyTags`, iterating equipment tags and calling
|
||||
`_sink.EnsureFolder` / `_sink.EnsureVariable`. Consistent with the rest of the rebuild; no
|
||||
adapter. Downside: re-expresses some grouping logic the walker already has.
|
||||
- **(B) Adapt `EquipmentNodeWalker` via a sink-backed `IAddressSpaceBuilder`.** Check for an
|
||||
existing capturing builder (`GenericDriverNodeManager.CapturingBuilder`,
|
||||
`src/Core/…/Core/OpcUa/GenericDriverNodeManager.cs`); if one cleanly wraps the sink, call
|
||||
`EquipmentNodeWalker.Walk(capturingBuilder, content)` and reuse the tested logic. Downside:
|
||||
couples the rebuild to the driver-builder API + that adapter.
|
||||
**Recommendation:** spend the first 20 min of Task 2 confirming whether a sink→builder adapter
|
||||
exists and is cheap. If yes → B (reuse the tested walker). If not → A. This plan is written for
|
||||
**A** (lower coupling, self-contained); swap the Task 2 body for B if the adapter is clean.
|
||||
|
||||
2. **Where equipment data comes from at rebuild: artifact vs live DB.** `MaterialiseGalaxyTags` uses
|
||||
the sealed-artifact composition. For consistency and snapshot-correctness, carry equipment data
|
||||
in the composition too (Task 1). A pragmatic alternative with precedent (the `b1b3f3f` SubscribeBulk
|
||||
pass queries the live DB) is to load `EquipmentNamespaceContent` directly from the DB in the
|
||||
rebuild — simpler, but live-DB-vs-sealed-artifact can diverge. **This plan carries it in the
|
||||
composition (the correct, consistent choice).**
|
||||
|
||||
3. **Folder NodeId vs BrowseName.** Keep the existing scheme: **NodeId = logical Id**
|
||||
(`UnsAreaId`/`UnsLineId`/`EquipmentId`) so browse-path resolution + ACLs are unaffected; set the
|
||||
**BrowseName/DisplayName = friendly `Name`** (Task 3). `MaterialiseHierarchy` already keys NodeId
|
||||
on the Id and displays `DisplayName`; the bug is that `DisplayName` is currently populated with
|
||||
the Id. The fix is in the composer (Task 3), not the applier.
|
||||
|
||||
4. **No double-materialisation.** `MaterialiseHierarchy` already creates the Area/Line/Equipment
|
||||
folders. The new equipment-tag pass must only add the **variables** under existing equipment
|
||||
folders (and any per-tag `FolderPath` sub-folders) — it must NOT re-create the equipment folders.
|
||||
|
||||
---
|
||||
|
||||
## Task 0: Confirm signatures + record the architecture decisions
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (do first)
|
||||
|
||||
**Files:**
|
||||
- Read: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` (`MaterialiseGalaxyTags`, `MaterialiseHierarchy`, `SafeEnsureFolder`, the `_sink` API)
|
||||
- Read: `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IOpcUaAddressSpaceSink.cs` (exact `EnsureFolder`/`EnsureVariable` signatures)
|
||||
- Read: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` (`Phase7CompositionResult`, `Compose`, how `EquipmentNode.DisplayName` + galaxy tags are built)
|
||||
- Read: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs` (`ParseComposition`, `BuildGalaxyTagPlans`)
|
||||
- Read: `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/EquipmentNodeWalker.cs` (the tested logic to mirror: `AddTagVariable`, identifier properties)
|
||||
|
||||
**Step 1:** Decide A vs B (decision #1) — grep for `CapturingBuilder` / `IAddressSpaceBuilder`
|
||||
implementations that wrap `IOpcUaAddressSpaceSink`. If a clean adapter exists, note "Task 2 uses B".
|
||||
**Step 2:** Confirm the sink's `EnsureVariable` signature (NodeId, parent, displayName,
|
||||
`DriverAttributeInfo` incl. `FullName` + `DataType`) — `MaterialiseGalaxyTags` is the template.
|
||||
**Step 3:** Record the confirmed decisions as a comment block at the top of the new
|
||||
`MaterialiseEquipmentTags` (created in Task 2). No code/test change in this task.
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Carry equipment signals in the deployment composition + artifact
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (Task 2 depends on it)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — add an `EquipmentTagPlan` list to `Phase7CompositionResult`; populate it in `Compose` from `Tag` rows where `EquipmentId != null` AND the tag's driver's namespace `Kind == Equipment` (the inverse of the galaxy filter). Set `DisplayName = Name` on Area/Line/Equipment records (decision #3 / Task 3 overlaps — do the field plumbing here).
|
||||
- Modify: the artifact serializer that writes `ArtifactBlob` (find via `grep -rn "ArtifactBlob\|RevisionHash\|Serialize" src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/ConfigComposer.cs`) — emit the equipment tags (with `EquipmentId`, `FolderPath`, `Name`, `DataType`, `DriverInstanceId`, `TagConfig.FullName`) into the `Tags` array (they are likely already there) and ensure Area/Line/Equipment friendly `Name`s are serialised.
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs` — add `BuildEquipmentTagPlans(root, drivers)`: the mirror of `BuildGalaxyTagPlans` that KEEPS `EquipmentId != null` tags whose namespace `Kind == Equipment`, reading `FullName` from `TagConfig`. Wire it into `ParseComposition`.
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs` (or the existing composition test file).
|
||||
|
||||
**Step 1 — failing test:** add a test that round-trips an artifact containing one Equipment
|
||||
namespace + one equipment `Tag` and asserts `ParseComposition(...).EquipmentTags` contains it with
|
||||
the right `EquipmentId`, `FullName`, `DataType`. Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter EquipmentTag` → FAIL (member missing).
|
||||
**Step 2 — implement** the `EquipmentTagPlan` record + populate in composer + parse in artifact.
|
||||
**Step 3 — run** the test → PASS, plus the full `Runtime.Tests` + `OpcUaServer.Tests` suites green.
|
||||
**Step 4 — commit:** `feat(opcua): carry Equipment-namespace tags through the deployment composition`.
|
||||
|
||||
**Design note:** `EquipmentNamespaceContent` (the `EquipmentNodeWalker` input) uses full entity
|
||||
types. If Task 2 chooses option B, `EquipmentTagPlan` should carry enough to reconstruct the
|
||||
`Tag`/`Equipment` fields the walker reads (`Name`, `FolderPath`, `EquipmentId`, `DataType`,
|
||||
`FullName`). For option A, a flat `EquipmentTagPlan(EquipmentId, DriverInstanceId, FolderPath, Name, DataType, FullName)` is enough.
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Materialise equipment signals in the live rebuild
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on Task 1)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — add `MaterialiseEquipmentTags(Phase7CompositionResult composition)`, a sink-based near-copy of `MaterialiseGalaxyTags`: for each `EquipmentTagPlan`, ensure its `FolderPath` sub-folder (if any) **under the existing equipment folder** (`parentNodeId = EquipmentId` or the sub-folder), then `EnsureVariable(nodeId: FullName, parent, displayName: Name, attributeInfo: new DriverAttributeInfo(FullName, DataType, …))`. Log `equipment tags materialised (tags=N, equipment=M)`.
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs:HandleRebuild` — after `MaterialiseGalaxyTags(composition)`, call `_applier.MaterialiseEquipmentTags(composition)`.
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/Phase7ApplierHierarchyTests.cs` (sibling to the existing hierarchy test, which mocks `IOpcUaAddressSpaceSink`).
|
||||
|
||||
**Step 1 — failing test:** with a fake sink, call `MaterialiseEquipmentTags` on a composition with
|
||||
one equipment tag and assert one `EnsureVariable(nodeId == FullName, parent == EquipmentId, displayName == Name)` call landed. Run filtered test → FAIL (method missing).
|
||||
**Step 2 — implement** `MaterialiseEquipmentTags` (mirror `MaterialiseGalaxyTags`; reuse
|
||||
`SafeEnsureFolder`; idempotent via the same dedupe the galaxy pass uses) **and** the
|
||||
`HandleRebuild` wire-up.
|
||||
**Step 3 — run** the new test + `OpcUaServer.Tests` + `Runtime.Tests` → PASS.
|
||||
**Step 4 — commit:** `feat(opcua): materialise Equipment-namespace tags in the live rebuild`.
|
||||
|
||||
**If Task 0 chose option B:** instead of a new method, build `EquipmentNamespaceContent` from the
|
||||
composition, obtain the sink-backed `IAddressSpaceBuilder`, and call `EquipmentNodeWalker.Walk`.
|
||||
Keep the same `HandleRebuild` call site + test assertions.
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Friendly browse names for UNS folders
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (verify after Task 1, which plumbs `DisplayName`)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — ensure the composition's
|
||||
Area/Line/Equipment records carry `DisplayName = <row>.Name` (not the logical Id). `MaterialiseHierarchy`
|
||||
already passes `DisplayName` to the sink as the folder browse name, so this is the only change needed.
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/Phase7ApplierHierarchyTests.cs` — assert
|
||||
`SafeEnsureFolder` is called with `displayName == "filling"` (Name) while `nodeId == "nw-area-filling"` (Id).
|
||||
|
||||
**Step 1 — failing test** asserting DisplayName == Name, NodeId == Id. Run → FAIL (currently DisplayName == Id).
|
||||
**Step 2 — implement** the composer change.
|
||||
**Step 3 — run** → PASS.
|
||||
**Step 4 — commit:** `fix(opcua): UNS folders browse by friendly Name, NodeId stays the logical Id`.
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Idempotency + restart-safety
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (after Task 2)
|
||||
|
||||
**Files:**
|
||||
- Read/verify: `OpcUaPublishActor.HandleRebuild` runs on both the apply path and the
|
||||
`DriverHostActor.RestoreApplied` bootstrap path (added in `b1b3f3f`) — so the new pass is already
|
||||
restart-covered. Confirm by inspection; no code change expected.
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/Phase7ApplierHierarchyTests.cs` — call
|
||||
`MaterialiseEquipmentTags` twice with the same composition and assert no duplicate `EnsureVariable`
|
||||
(idempotent), matching the galaxy pass's dedupe behaviour.
|
||||
|
||||
**Step 1 — failing test** (double-apply → single variable). **Step 2 — fix** dedupe if needed.
|
||||
**Step 3 — run** → PASS. **Step 4 — commit:** `test(opcua): equipment-tag materialisation is idempotent`.
|
||||
|
||||
---
|
||||
|
||||
## Task 5: docker-dev integration verification + tool support
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (last; needs Tasks 1–3 deployed)
|
||||
|
||||
**Files:**
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/EquipmentNamespaceMaterializationTests.cs`
|
||||
(model on the existing `DriverReconnectE2eTests.cs` / phase-7 smoke) — seed a 1-area/1-line/1-equipment/1-tag
|
||||
Equipment namespace + a Modbus FK driver, apply a deployment, browse `…/filling/line-1/<eq>/<signal>`,
|
||||
assert the variable exists with `BadWaitingForInitialData` (structure-only).
|
||||
- Modify (in the `scadaproj` repo, not OtOpcUa): `scadaproj/otopcua-uns-loader/otopcua_uns.py` —
|
||||
add a `verify` branch that browses the Equipment tree (friendly names) and asserts the leaf count
|
||||
matches the loaded equipment tags. (Tracked here for completeness; commit in scadaproj.)
|
||||
|
||||
**Step 1 — write** the integration test (skip-guarded if it needs live infra, per the repo's other
|
||||
integration tests). **Step 2 — run** it against docker-dev (`docs/v2/implementation/phase-7-e2e-smoke.md`
|
||||
has the harness). **Step 3 — manual confirm** via the AdminUI Deploy at `:9200` + an asyncua browse.
|
||||
**Step 4 — commit:** `test(opcua): e2e Equipment-namespace structure materialisation`.
|
||||
|
||||
---
|
||||
|
||||
## Verification (whole milestone)
|
||||
|
||||
After all tasks: deploy an Equipment namespace via `scadaproj/otopcua-uns-loader` (extend it to emit
|
||||
Equipment rows) + the AdminUI Deploy, then browse `:4840`:
|
||||
- `OtOpcUa/filling/line-1/<equipment>/<signal>` exists, folders browse-named `filling` / `line-1` / …
|
||||
- leaf variables read `BadWaitingForInitialData` (values are the next milestone).
|
||||
- A node restart auto-restores the tree (via `RestoreApplied`) with no re-deploy.
|
||||
|
||||
## Out of scope (explicit)
|
||||
- **Live values** for equipment signals (driver subscribe / VirtualTag engine / OpcUaClient factory) —
|
||||
the next milestone (scope doc §5 WS-3).
|
||||
- The Galaxy/SystemPlatform path (works).
|
||||
- The AdminUI UNS editor.
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-06-equipment-namespace-structure-milestone.md",
|
||||
"scopeDoc": "docs/plans/2026-06-06-equipment-namespace-materialization-scope.md",
|
||||
"branch": "feat/equipment-namespace-structure",
|
||||
"tasks": [
|
||||
{"id": 0, "nativeTaskId": 86, "subject": "Task 0: Confirm signatures + record architecture decisions", "status": "completed", "blockedBy": []},
|
||||
{"id": 1, "nativeTaskId": 87, "subject": "Task 1: Carry equipment signals in the composition + artifact", "status": "completed", "blockedBy": [86]},
|
||||
{"id": 2, "nativeTaskId": 88, "subject": "Task 2: Materialise equipment signals in the live rebuild", "status": "completed", "blockedBy": [87]},
|
||||
{"id": 3, "nativeTaskId": 89, "subject": "Task 3: Friendly browse names for UNS folders", "status": "completed", "blockedBy": [87]},
|
||||
{"id": 4, "nativeTaskId": 90, "subject": "Task 4: Idempotency + restart-safety", "status": "completed", "blockedBy": [88]},
|
||||
{"id": 5, "nativeTaskId": 91, "subject": "Task 5: docker-dev integration verification + tool support", "status": "completed", "blockedBy": [88, 89]}
|
||||
],
|
||||
"lastUpdated": "2026-06-06"
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
# Per-ClusterId Scoping (hub-and-spoke single mesh) — Design
|
||||
|
||||
**Date:** 2026-06-07
|
||||
**Status:** Approved (brainstorming complete; next step: writing-plans)
|
||||
**Branch:** `feat/per-cluster-scoping`
|
||||
|
||||
## Goal
|
||||
|
||||
Let one **central** cluster's Admin UI manage and deploy to multiple
|
||||
logically-separate clusters that share a single Akka mesh. The central cluster
|
||||
runs 2 fused `admin,driver` nodes (the only UI + the only deploy singleton);
|
||||
each site cluster runs 2 `driver`-only nodes. A single global deploy from the
|
||||
central UI reaches every node, and **each node applies only the slice of the
|
||||
configuration that belongs to its own `ClusterId`** — its drivers and its OPC UA
|
||||
address space. Ship global deploy first; per-cluster deploy is a later follow-up.
|
||||
|
||||
## Why this needs runtime work
|
||||
|
||||
The deploy channel is **in-mesh**: AdminUI → `admin-operations` singleton →
|
||||
`ConfigPublishCoordinator` → DistributedPubSub → driver nodes. DistributedPubSub
|
||||
does not cross Akka mesh boundaries, so for the central UI to deploy to site
|
||||
servers the site nodes **must join the central mesh**. But the runtime currently
|
||||
assumes **one Akka mesh == one logical cluster**:
|
||||
|
||||
- `DriverHostActor.ReconcileDrivers` spawns **every** `DriverInstance` in the
|
||||
artifact with no cluster filter (`DriverHostActor.cs:367`). The `ClusterId` on
|
||||
a spec is used only to *label* health snapshots.
|
||||
- `ConfigPublishCoordinator.DiscoverDriverNodes` broadcasts to **every** driver
|
||||
member of the mesh, no `ClusterId` filter (`ConfigPublishCoordinator.cs:248`).
|
||||
- `ConfigComposer.SnapshotAndFlattenAsync` snapshots **all** clusters' rows into
|
||||
one flat artifact; the address space is built from the whole thing.
|
||||
|
||||
Consequence today: put MAIN + SITE-A + SITE-B nodes in one mesh and every node
|
||||
spawns every cluster's drivers (Galaxy auto-stubs on Linux, so it *would* start)
|
||||
and serves a **merged** address space of all three clusters. That is why the
|
||||
existing docker-dev rig uses three isolated meshes.
|
||||
|
||||
This design adds the missing per-`ClusterId` scoping so a shared mesh behaves as
|
||||
distinct logical clusters.
|
||||
|
||||
## Approach (chosen: A — node-side, parse-time filter, ClusterId from the artifact)
|
||||
|
||||
Each node resolves *its own* `ClusterId` by finding its `NodeId`
|
||||
(`_localNode.Value`, format `"host:port"`, e.g. `central-1:4053`) in the
|
||||
artifact's `ClusterNode` rows, then filters both the driver specs and the
|
||||
address-space composition to that cluster.
|
||||
|
||||
The artifact is a self-contained, consistent snapshot that already includes
|
||||
`ClusterNode` + `DriverInstance` + `Namespace` + `UnsArea` (all carrying
|
||||
`ClusterId`), so resolution needs **no extra DB query** and has no
|
||||
seal-vs-apply inconsistency window. The coordinator stays a **single broadcast**;
|
||||
every node just applies its own slice.
|
||||
|
||||
### Alternatives considered
|
||||
|
||||
- **B — control-plane per-node artifact slices.** `ConfigComposer` emits a
|
||||
filtered artifact per cluster and the coordinator dispatches the right slice to
|
||||
each node. Rejected: turns one broadcast into per-cluster dispatch (a large
|
||||
change to the deploy/ack model), contradicts "ship global first," and still
|
||||
needs the same transitive `ClusterId` resolution.
|
||||
- **C — runtime DB lookup for ClusterId.** Node queries `ClusterNode` by its
|
||||
address at apply time, then filters post-parse. Rejected: extra DB round-trip
|
||||
per node per deploy and a seal-vs-apply inconsistency window; the artifact
|
||||
already contains everything A needs.
|
||||
|
||||
## Components
|
||||
|
||||
### 1. Self-`ClusterId` resolution
|
||||
|
||||
New helper `DeploymentArtifact.ParseClusterScope(blob, nodeId)` returning
|
||||
`(string? ClusterId, int ClusterCount)`:
|
||||
- `ClusterId` = the `ClusterNode` row whose `NodeId == nodeId`, else `null`.
|
||||
- `ClusterCount` = number of `ServerCluster` rows in the artifact.
|
||||
|
||||
Both `DriverHostActor` and `OpcUaPublishActor` call it with `_localNode.Value`.
|
||||
|
||||
**Fallback rule (single source of truth for every filter site):**
|
||||
|
||||
| Condition | Behavior |
|
||||
|---|---|
|
||||
| `ClusterCount ≤ 1` | **Lenient — no filter** (legacy single-cluster meshes + the entire existing test suite behave exactly as today). |
|
||||
| `ClusterCount > 1` and `ClusterId` resolved | **Filter to my cluster.** |
|
||||
| `ClusterCount > 1` and `ClusterId` unresolved | **Apply nothing + log error** (a node in a multi-cluster mesh with no `ClusterNode` row is misconfigured; serving everything would leak other clusters' data). |
|
||||
|
||||
The `ClusterCount ≤ 1` lenient branch is what protects the existing ~210 v2
|
||||
tests and any single-cluster deployment from any behavior change.
|
||||
|
||||
### 2. Driver-spawn filter — `DriverHostActor`
|
||||
|
||||
`DriverInstanceSpec` already carries `ClusterId`, so in `ReconcileDrivers` (and
|
||||
the restart `RestoreServedState` path) apply a one-line predicate over the parsed
|
||||
specs using the fallback rule. In multi-cluster mode, specs with a `null`
|
||||
`ClusterId` are excluded + logged (should never occur — `ConfigComposer` always
|
||||
serializes the column).
|
||||
|
||||
### 3. Address-space filter — `ParseComposition` + `OpcUaPublishActor`
|
||||
|
||||
Add `DeploymentArtifact.ParseComposition(blob, clusterId)`. At parse time the raw
|
||||
artifact entities still carry `ClusterId` / `NamespaceId` / `UnsAreaId` /
|
||||
`DriverInstanceId`, so build in-cluster id sets from the artifact and filter every
|
||||
projection:
|
||||
|
||||
| Projection | Filter predicate |
|
||||
|---|---|
|
||||
| `UnsAreas` | `ClusterId == mine` (direct) |
|
||||
| `UnsLines` | `UnsAreaId ∈ myAreas` |
|
||||
| `EquipmentNodes` | `DriverInstanceId ∈ myDrivers` |
|
||||
| `DriverInstancePlans` | `DriverInstanceId ∈ myDrivers` |
|
||||
| `GalaxyTags` / `EquipmentTags` | `DriverInstanceId ∈ myDrivers` |
|
||||
| `ScriptedAlarmPlans` | `EquipmentId ∈ myEquipment` |
|
||||
|
||||
`OpcUaPublishActor.HandleRebuild` resolves `myClusterId` and calls the filtered
|
||||
parse before `Phase7Planner.Compute`. `_lastApplied` becomes the filtered
|
||||
composition, so the incremental diff stays correct across redeploys. The no-arg
|
||||
`ParseComposition(blob)` is left untouched (legacy / single-cluster path).
|
||||
|
||||
### 4. Deploy ack / convergence
|
||||
|
||||
`ConfigPublishCoordinator` keeps broadcasting to all driver members and waiting
|
||||
for all acks (in the new rig all 6 nodes are driver-role). Each node applies its
|
||||
slice and acks — **including a node whose cluster has an empty slice**. The one
|
||||
risk: the ack must fire even when the node's plan is empty. Implementation will
|
||||
**verify the ack is unconditional** and add a small fix if it is currently gated
|
||||
on a non-empty change set. No change to `DiscoverDriverNodes`.
|
||||
|
||||
### 5. docker-dev compose + seed rewrite
|
||||
|
||||
- **compose:** remove `admin-a` / `admin-b` / `driver-a` / `driver-b`; add
|
||||
`central-1` / `central-2` (`OTOPCUA_ROLES=admin,driver`, seed = `central-1`,
|
||||
OPC UA `4840` / `4841`, ASPNETCORE UI on `:9000`). `site-a-1/2`, `site-b-1/2`
|
||||
become `driver`-only (`OTOPCUA_ROLES=driver`, `Cluster__Roles__0=driver`, seed
|
||||
→ `central-1`, OPC UA `4842`–`4845`), dropping their UI / Jwt / Ldap /
|
||||
DeployApiKey env + Traefik exposure. All nodes share the one ConfigDb.
|
||||
- **traefik:** single `PathPrefix(/)` router → `central-1` / `central-2`
|
||||
(sticky cookie); drop the two site routers + services in both
|
||||
`docker-compose.yml` and `traefik-dynamic.yml`.
|
||||
- **seed SQL (`seed/seed-clusters.sql`):** MAIN `ClusterNode` rows become
|
||||
`central-1:4053` / `central-2:4053` (replacing `driver-a` / `driver-b`);
|
||||
SITE-A / SITE-B keep their `ServerCluster` + 2 `ClusterNode` rows but **no
|
||||
drivers/tags** (empty sites). Update the `Notes` columns + the file header
|
||||
comments. The Galaxy namespace / driver / tags stay on MAIN (they run on the
|
||||
central fused nodes).
|
||||
- **compose header + comment blocks:** rewrite the topology description (single
|
||||
mesh, hub-and-spoke, central-only UI).
|
||||
|
||||
## Data flow (after the change)
|
||||
|
||||
1. Operator clicks **Deploy** in the central UI (or `POST /api/deployments`).
|
||||
2. `admin-operations` singleton (on a central node) → `ConfigComposer` snapshots
|
||||
**all** clusters' rows into one artifact → `ConfigPublishCoordinator`
|
||||
broadcasts `DispatchDeployment` to **all** driver members.
|
||||
3. Each node resolves its own `ClusterId` from the artifact's `ClusterNode` rows.
|
||||
4. `DriverHostActor` spawns only its cluster's `DriverInstance`s.
|
||||
5. `OpcUaPublishActor` materialises only its cluster's address space.
|
||||
6. Every node acks; the coordinator seals the deployment when all acks arrive.
|
||||
7. Result: central `:4840`/`:4841` serve MAIN's Galaxy tree; site
|
||||
`:4842`–`:4845` serve only their own (empty until configured) trees.
|
||||
|
||||
## Error handling
|
||||
|
||||
- **Misconfigured node** (multi-cluster mesh, no matching `ClusterNode` row):
|
||||
applies nothing, logs an error, still acks (so the deploy converges rather than
|
||||
hanging). Surfaced for the operator to add the missing `ClusterNode` row.
|
||||
- **Pre-PR / single-cluster artifacts:** `ClusterCount ≤ 1` → lenient no-filter,
|
||||
identical to current behavior.
|
||||
- **Empty cluster slice:** node applies an empty plan and acks normally.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit:** `ParseClusterScope` (match / miss / count); `ParseComposition(blob,
|
||||
clusterId)` (cross-cluster projections excluded; transitive resolution for
|
||||
UnsLine / Equipment / Tag / ScriptedAlarm); the driver-spec filter predicate
|
||||
(lenient / strict / unresolved-strict).
|
||||
- **Integration:** a 2-cluster scoping test on the in-process harness — two
|
||||
driver nodes assigned to different `ClusterId`s, one deploy, assert each spawns
|
||||
only its cluster's drivers and materialises only its cluster's tree.
|
||||
- **Backward-compat:** the existing single-cluster suites must stay green (the
|
||||
`ClusterCount ≤ 1` lenient branch guarantees this).
|
||||
- **Live (docker-dev rig):** bring the rig up, sign into the central UI, confirm
|
||||
3 clusters listed, deploy, confirm `:4840` shows the Galaxy tree and
|
||||
`:4842`/`:4844` are empty (not the merged tree).
|
||||
|
||||
## Classification
|
||||
|
||||
High-risk — touches the actor model, the Phase7 data contract, and the deploy
|
||||
path. The implementation plan will be TDD'd section by section.
|
||||
|
||||
## Out of scope (follow-ups)
|
||||
|
||||
- **Per-cluster deploy** (deploy just SITE-A from the UI) — global deploy ships
|
||||
first; per-cluster targeting is a later coordinator + UI enhancement.
|
||||
- **Seeding demo drivers on the sites** — sites start empty; drivers are added
|
||||
via the central UI.
|
||||
@@ -0,0 +1,825 @@
|
||||
# Per-ClusterId Scoping (hub-and-spoke single mesh) Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Let one central cluster's Admin UI deploy to multiple logically-separate
|
||||
clusters that share one Akka mesh, with each node applying only its own
|
||||
`ClusterId`'s drivers + OPC UA address space.
|
||||
|
||||
**Architecture:** Approach A — node-side, parse-time filtering. Each node resolves
|
||||
its own `ClusterId` from the deployment artifact's `ClusterNode` rows (no extra DB
|
||||
query) and filters both the driver specs and the address-space composition to that
|
||||
cluster. The coordinator stays a single broadcast; every node applies its own
|
||||
slice and acks. A single-cluster artifact filters to nothing-different, so existing
|
||||
deployments + tests are unaffected.
|
||||
|
||||
**Tech Stack:** .NET 10, Akka.NET, EF Core, `System.Text.Json` (artifact parse),
|
||||
xUnit v2 + Shouldly (Runtime.Tests uses Akka.TestKit.Xunit2), Docker Compose + Traefik.
|
||||
|
||||
**Design doc:** `docs/plans/2026-06-07-per-cluster-scoping-design.md` (approved).
|
||||
|
||||
**The fallback rule (single source of truth — implemented once in `ResolveClusterScope`):**
|
||||
- artifact has **≤1 cluster** → `None` (apply everything; legacy/single-cluster + all existing tests behave identically).
|
||||
- artifact has **>1 cluster** and the node's `ClusterNode` row is found → `ScopeTo(clusterId)`.
|
||||
- artifact has **>1 cluster** and the node's row is **not** found → `Suppress` (apply nothing + the caller logs).
|
||||
|
||||
**Hard rules (carry through every task):** never `git add .` — stage by explicit
|
||||
path; never stage `sql_login.txt` or `src/Server/.../pki/`; never echo the gateway
|
||||
API key into a *new* tracked file; never force-push or skip hooks.
|
||||
|
||||
---
|
||||
|
||||
## Task 1: `ResolveClusterScope` + node-scoped `ParseDriverInstances`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 6, Task 7, Task 8
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs`
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs`
|
||||
|
||||
**Context:** `DeploymentArtifact` is a static JSON decoder over the artifact blob
|
||||
produced by `ConfigComposer.SnapshotAndFlattenAsync`. The artifact root has
|
||||
Pascal-case arrays: `Clusters` (ServerCluster, has `ClusterId`), `Nodes`
|
||||
(ClusterNode, has `NodeId` + `ClusterId`), `DriverInstances` (has `ClusterId`),
|
||||
`Namespaces`/`UnsAreas` (have `ClusterId`), `Equipment`/`Tags`/`UnsLines`/`ScriptedAlarms`
|
||||
(no `ClusterId` — traced via `DriverInstanceId`/`UnsAreaId`/`EquipmentId`).
|
||||
`DriverInstanceSpec` already carries `ClusterId` (`DeploymentArtifact.cs:19`).
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Add to `DeploymentArtifactTests.cs`. Reuse the file's existing artifact-blob
|
||||
helper if present; otherwise add this minimal builder:
|
||||
|
||||
```csharp
|
||||
private static byte[] BlobOf(object snapshot) =>
|
||||
System.Text.Json.JsonSerializer.SerializeToUtf8Bytes(snapshot);
|
||||
|
||||
private static object MultiClusterSnapshot() => new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "main-galaxy", Name = "g", DriverType = "GalaxyMxGateway", Enabled = true, DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "sa-modbus", Name = "m", DriverType = "Modbus", Enabled = true, DriverConfig = "{}", ClusterId = "SITE-A", NamespaceId = "sa-ns" },
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
```csharp
|
||||
[Fact]
|
||||
public void ResolveClusterScope_single_cluster_artifact_returns_None()
|
||||
{
|
||||
var blob = BlobOf(new { Clusters = new[] { new { ClusterId = "MAIN" } }, Nodes = Array.Empty<object>() });
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(blob, "central-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.None);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveClusterScope_multi_cluster_known_node_scopes_to_its_cluster()
|
||||
{
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(BlobOf(MultiClusterSnapshot()), "site-a-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.ScopeTo);
|
||||
scope.ClusterId.ShouldBe("SITE-A");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveClusterScope_multi_cluster_unknown_node_suppresses()
|
||||
{
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(BlobOf(MultiClusterSnapshot()), "ghost-9:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.Suppress);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_returns_only_my_clusters_drivers()
|
||||
{
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(BlobOf(MultiClusterSnapshot()), "central-1:4053");
|
||||
specs.Select(s => s.DriverInstanceId).ShouldBe(new[] { "main-galaxy" });
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_unknown_node_returns_empty()
|
||||
{
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(BlobOf(MultiClusterSnapshot()), "ghost-9:4053");
|
||||
specs.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_single_cluster_returns_all()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" } },
|
||||
Nodes = new[] { new { NodeId = "n1:4053", ClusterId = "MAIN" } },
|
||||
DriverInstances = new[] { new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "d1", Name = "d", DriverType = "Modbus", Enabled = true, DriverConfig = "{}", ClusterId = "MAIN" } },
|
||||
});
|
||||
DeploymentArtifact.ParseDriverInstances(blob, "anything:4053").Select(s => s.DriverInstanceId).ShouldBe(new[] { "d1" });
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run the tests — verify they fail**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
Expected: FAIL — `ClusterFilterMode` / `ResolveClusterScope` / the 2-arg `ParseDriverInstances` don't exist.
|
||||
|
||||
**Step 3: Implement**
|
||||
|
||||
In `DeploymentArtifact.cs`, add the scope types just above `public static class DeploymentArtifact` (top-level, same namespace):
|
||||
|
||||
```csharp
|
||||
/// <summary>How a node should scope a deployment artifact to its own ClusterId.</summary>
|
||||
public enum ClusterFilterMode { None, ScopeTo, Suppress }
|
||||
|
||||
/// <summary>Resolved scoping decision for a node against an artifact.</summary>
|
||||
/// <param name="Mode">None = apply everything (single-cluster / legacy); ScopeTo = filter to <paramref name="ClusterId"/>; Suppress = apply nothing.</param>
|
||||
/// <param name="ClusterId">The node's ClusterId when <paramref name="Mode"/> is ScopeTo; otherwise null.</param>
|
||||
public readonly record struct ClusterScope(ClusterFilterMode Mode, string? ClusterId);
|
||||
```
|
||||
|
||||
Inside the class, add `ResolveClusterScope` and the 2-arg `ParseDriverInstances`
|
||||
overload (place after the existing `ParseDriverInstances`):
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Resolve how a node should scope a multi-cluster deployment artifact to its own logical
|
||||
/// cluster, from the same consistent snapshot it applies (the artifact's ClusterNode rows map
|
||||
/// NodeId → ClusterId; the ServerCluster count decides single- vs multi-cluster). Fallback rule:
|
||||
/// ≤1 cluster ⇒ no filter (legacy single-cluster meshes + existing tests unchanged); >1 cluster
|
||||
/// with the node's row found ⇒ scope to that ClusterId; >1 cluster with the row missing ⇒
|
||||
/// suppress (apply nothing) — a node in a multi-cluster mesh with no ClusterNode row is
|
||||
/// misconfigured and must not serve other clusters' data.
|
||||
/// </summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="nodeId">This node's identity in "host:port" form (matches ClusterNode.NodeId).</param>
|
||||
/// <returns>The scoping decision for this node.</returns>
|
||||
public static ClusterScope ResolveClusterScope(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
if (blob.IsEmpty) return new ClusterScope(ClusterFilterMode.None, null);
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(blob.ToArray());
|
||||
var root = doc.RootElement;
|
||||
var clusterCount = root.TryGetProperty("Clusters", out var cl) && cl.ValueKind == JsonValueKind.Array
|
||||
? cl.GetArrayLength() : 0;
|
||||
if (clusterCount <= 1) return new ClusterScope(ClusterFilterMode.None, null);
|
||||
|
||||
string? myCluster = null;
|
||||
if (root.TryGetProperty("Nodes", out var nodes) && nodes.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var el in nodes.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var nid = el.TryGetProperty("NodeId", out var nEl) ? nEl.GetString() : null;
|
||||
if (!string.Equals(nid, nodeId, StringComparison.Ordinal)) continue;
|
||||
myCluster = el.TryGetProperty("ClusterId", out var cEl) ? cEl.GetString() : null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return string.IsNullOrWhiteSpace(myCluster)
|
||||
? new ClusterScope(ClusterFilterMode.Suppress, null)
|
||||
: new ClusterScope(ClusterFilterMode.ScopeTo, myCluster);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return new ClusterScope(ClusterFilterMode.None, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Cluster-scoped overload: the driver specs a node should host given its NodeId.</summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="nodeId">This node's identity in "host:port" form.</param>
|
||||
/// <returns>The filtered driver specs per the node's <see cref="ResolveClusterScope"/> decision.</returns>
|
||||
public static IReadOnlyList<DriverInstanceSpec> ParseDriverInstances(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
var scope = ResolveClusterScope(blob, nodeId);
|
||||
var all = ParseDriverInstances(blob);
|
||||
return scope.Mode switch
|
||||
{
|
||||
ClusterFilterMode.Suppress => Array.Empty<DriverInstanceSpec>(),
|
||||
ClusterFilterMode.ScopeTo => all.Where(
|
||||
s => string.Equals(s.ClusterId, scope.ClusterId, StringComparison.Ordinal)).ToArray(),
|
||||
_ => all,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4: Run the tests — verify they pass**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
Expected: PASS (new + all pre-existing DeploymentArtifact tests).
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs
|
||||
git commit -m "feat(runtime): ClusterId scope resolution + node-scoped driver-spec parse"
|
||||
```
|
||||
|
||||
**Acceptance:** `ResolveClusterScope` implements the 3-branch rule; the scoped
|
||||
`ParseDriverInstances` filters per the rule; the no-arg overload is untouched.
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Node-scoped `ParseComposition` (address-space filter)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 6, Task 7, Task 8
|
||||
|
||||
**Blocked by:** Task 1 (uses `ClusterScope` / `ResolveClusterScope`, same file).
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs`
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs`
|
||||
|
||||
**Context:** `ParseComposition(blob)` returns a `Phase7CompositionResult` whose
|
||||
projections carry no `ClusterId`. Filter by building in-cluster id sets from the
|
||||
raw artifact: `DriverInstanceId`s and `UnsAreaId`s whose row's `ClusterId` matches,
|
||||
plus `EquipmentId`s whose `DriverInstanceId` is in-cluster. Then filter each
|
||||
projection (areas by `UnsAreaId`, lines by `UnsAreaId`, equipment by `EquipmentId`,
|
||||
drivers/galaxyTags/equipmentTags by `DriverInstanceId`, alarms by `EquipmentId`).
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Extend `MultiClusterSnapshot()` from Task 1 with namespaces + tags so galaxy-tag
|
||||
filtering is exercised, then add the test:
|
||||
|
||||
```csharp
|
||||
private static object MultiClusterSnapshotWithTags() => new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceId = "main-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
new { DriverInstanceId = "sa-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "SITE-A", NamespaceId = "sa-ns" },
|
||||
},
|
||||
Namespaces = new[]
|
||||
{
|
||||
new { NamespaceId = "main-ns", ClusterId = "MAIN", Kind = 1 },
|
||||
new { NamespaceId = "sa-ns", ClusterId = "SITE-A", Kind = 1 },
|
||||
},
|
||||
Tags = new[]
|
||||
{
|
||||
new { TagId = "t-main", DriverInstanceId = "main-galaxy", EquipmentId = (string?)null, Name = "M1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
new { TagId = "t-sa", DriverInstanceId = "sa-galaxy", EquipmentId = (string?)null, Name = "S1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
},
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_keeps_only_my_clusters_drivers_and_tags()
|
||||
{
|
||||
var blob = BlobOf(MultiClusterSnapshotWithTags());
|
||||
|
||||
var main = DeploymentArtifact.ParseComposition(blob, "central-1:4053");
|
||||
main.DriverInstancePlans.Select(d => d.DriverInstanceId).ShouldBe(new[] { "main-galaxy" });
|
||||
main.GalaxyTags.Select(t => t.TagId).ShouldBe(new[] { "t-main" });
|
||||
|
||||
var siteA = DeploymentArtifact.ParseComposition(blob, "site-a-1:4053");
|
||||
siteA.DriverInstancePlans.Select(d => d.DriverInstanceId).ShouldBe(new[] { "sa-galaxy" });
|
||||
siteA.GalaxyTags.Select(t => t.TagId).ShouldBe(new[] { "t-sa" });
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_unknown_node_is_empty()
|
||||
{
|
||||
var comp = DeploymentArtifact.ParseComposition(BlobOf(MultiClusterSnapshotWithTags()), "ghost-9:4053");
|
||||
comp.GalaxyTags.ShouldBeEmpty();
|
||||
comp.DriverInstancePlans.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_single_cluster_node_id_overload_matches_legacy()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" } },
|
||||
Nodes = new[] { new { NodeId = "n1:4053", ClusterId = "MAIN" } },
|
||||
DriverInstances = new[] { new { DriverInstanceId = "d1", DriverType = "Modbus", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "ns" } },
|
||||
});
|
||||
DeploymentArtifact.ParseComposition(blob, "anything:4053").DriverInstancePlans.Count
|
||||
.ShouldBe(DeploymentArtifact.ParseComposition(blob).DriverInstancePlans.Count);
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run — verify FAIL** (2-arg `ParseComposition` missing):
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
|
||||
**Step 3: Implement**
|
||||
|
||||
Add to `DeploymentArtifact.cs` (after the no-arg `ParseComposition`):
|
||||
|
||||
```csharp
|
||||
/// <summary>Cluster-scoped overload: the address-space composition a node should materialise given
|
||||
/// its NodeId. Filters every projection to the node's own ClusterId (see <see cref="ResolveClusterScope"/>).</summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="nodeId">This node's identity in "host:port" form.</param>
|
||||
/// <returns>The filtered composition per the node's scoping decision.</returns>
|
||||
public static Phase7CompositionResult ParseComposition(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
var scope = ResolveClusterScope(blob, nodeId);
|
||||
if (scope.Mode == ClusterFilterMode.None) return ParseComposition(blob);
|
||||
if (scope.Mode == ClusterFilterMode.Suppress) return Empty();
|
||||
|
||||
var full = ParseComposition(blob);
|
||||
var sets = BuildClusterSets(blob, scope.ClusterId!);
|
||||
return new Phase7CompositionResult(
|
||||
full.UnsAreas.Where(a => sets.AreaIds.Contains(a.UnsAreaId)).ToArray(),
|
||||
full.UnsLines.Where(l => sets.AreaIds.Contains(l.UnsAreaId)).ToArray(),
|
||||
full.EquipmentNodes.Where(e => sets.EquipmentIds.Contains(e.EquipmentId)).ToArray(),
|
||||
full.DriverInstancePlans.Where(d => sets.DriverIds.Contains(d.DriverInstanceId)).ToArray(),
|
||||
full.ScriptedAlarmPlans.Where(a => sets.EquipmentIds.Contains(a.EquipmentId)).ToArray(),
|
||||
full.GalaxyTags.Where(t => sets.DriverIds.Contains(t.DriverInstanceId)).ToArray())
|
||||
{
|
||||
EquipmentTags = full.EquipmentTags.Where(t => sets.DriverIds.Contains(t.DriverInstanceId)).ToArray(),
|
||||
};
|
||||
}
|
||||
|
||||
private sealed record ClusterSets(HashSet<string> DriverIds, HashSet<string> AreaIds, HashSet<string> EquipmentIds);
|
||||
|
||||
/// <summary>Build the in-cluster id sets used to filter a composition: DriverInstanceIds + UnsAreaIds
|
||||
/// that directly carry the ClusterId, plus EquipmentIds whose DriverInstanceId is in-cluster.</summary>
|
||||
private static ClusterSets BuildClusterSets(ReadOnlySpan<byte> blob, string clusterId)
|
||||
{
|
||||
var driverIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
var areaIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
var equipmentIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(blob.ToArray());
|
||||
var root = doc.RootElement;
|
||||
CollectIdsWhereCluster(root, "DriverInstances", "DriverInstanceId", clusterId, driverIds);
|
||||
CollectIdsWhereCluster(root, "UnsAreas", "UnsAreaId", clusterId, areaIds);
|
||||
// Equipment carries no ClusterId — include it when its DriverInstanceId is in-cluster.
|
||||
if (root.TryGetProperty("Equipment", out var eq) && eq.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var el in eq.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var di = el.TryGetProperty("DriverInstanceId", out var diEl) ? diEl.GetString() : null;
|
||||
var id = el.TryGetProperty("EquipmentId", out var idEl) ? idEl.GetString() : null;
|
||||
if (!string.IsNullOrWhiteSpace(id) && di is not null && driverIds.Contains(di))
|
||||
equipmentIds.Add(id!);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (JsonException) { /* empty sets ⇒ nothing matches ⇒ empty composition */ }
|
||||
return new ClusterSets(driverIds, areaIds, equipmentIds);
|
||||
}
|
||||
|
||||
private static void CollectIdsWhereCluster(
|
||||
JsonElement root, string arrayName, string idField, string clusterId, HashSet<string> into)
|
||||
{
|
||||
if (!root.TryGetProperty(arrayName, out var arr) || arr.ValueKind != JsonValueKind.Array) return;
|
||||
foreach (var el in arr.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var cid = el.TryGetProperty("ClusterId", out var cEl) ? cEl.GetString() : null;
|
||||
if (!string.Equals(cid, clusterId, StringComparison.Ordinal)) continue;
|
||||
var id = el.TryGetProperty(idField, out var idEl) ? idEl.GetString() : null;
|
||||
if (!string.IsNullOrWhiteSpace(id)) into.Add(id!);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Note: equipment is filtered via its `DriverInstanceId` (schema-guaranteed present
|
||||
for equipment-namespace rows). If a future schema allows equipment with a null
|
||||
`DriverInstanceId`, extend `BuildClusterSets` to also include equipment whose
|
||||
`UnsLineId` maps to an in-cluster `UnsArea` — out of scope here (the dev rig's
|
||||
sites are empty).
|
||||
|
||||
**Step 4: Run — verify PASS** (new + pre-existing tests):
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs
|
||||
git commit -m "feat(runtime): node-scoped ParseComposition filters address space by ClusterId"
|
||||
```
|
||||
|
||||
**Acceptance:** scoped `ParseComposition` excludes cross-cluster projections;
|
||||
single-cluster + unknown-node behavior matches the rule; no-arg overload untouched.
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Wire driver-spawn + SubscribeBulk filtering into `DriverHostActor`
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 4
|
||||
|
||||
**Blocked by:** Task 1, Task 2.
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs:367` (ReconcileDrivers) and `:432` (PushDesiredSubscriptions)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/` (add `DriverHostActorClusterScopeTests.cs`, or extend an existing DriverHostActor test if present)
|
||||
|
||||
**Context:** `ReconcileDrivers` (line 349) loads the artifact blob then calls
|
||||
`ParseDriverInstances(blob)`. `PushDesiredSubscriptions` (line 412) calls
|
||||
`ParseComposition(blob)`. Both run for normal applies (`ApplyAndAck`, line 311/321)
|
||||
**and** restart restore (`RestoreApplied`, line 393/395) — so changing these two
|
||||
call sites covers both paths. The ack (`SendAck`, line 314) fires unconditionally
|
||||
*before* the rebuild, so an empty/suppressed slice still acks — no ack change needed.
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
A TestKit test that a driver node in a multi-cluster artifact spawns only its
|
||||
cluster's drivers, and a node whose cluster has no drivers still reaches Applied
|
||||
(acks). Model it on the existing DriverHostActor tests in the same folder (reuse
|
||||
their in-memory DbContext + DispatchDeployment plumbing — inspect a sibling test
|
||||
for the exact harness helpers). Core assertions:
|
||||
|
||||
```csharp
|
||||
// Given a sealed deployment whose artifact has 2 clusters (MAIN: 1 driver, SITE-A: 1 driver)
|
||||
// and a DriverHostActor whose _localNode = "site-a-1:4053":
|
||||
// - after DispatchDeployment, GetDiagnostics shows ONLY the SITE-A driver (not MAIN's)
|
||||
// - the node sends an Applied ApplyAck (convergence holds even though it ignored MAIN's driver)
|
||||
// And a second actor with _localNode = "central-1:4053" shows ONLY the MAIN driver.
|
||||
```
|
||||
|
||||
**Step 2: Run — verify FAIL** (node currently spawns both clusters' drivers):
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DriverHostActorClusterScope"`
|
||||
|
||||
**Step 3: Implement** — two one-line call-site changes:
|
||||
|
||||
`DriverHostActor.cs:367`:
|
||||
```csharp
|
||||
// before:
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(blob);
|
||||
// after:
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(blob, _localNode.Value);
|
||||
```
|
||||
|
||||
`DriverHostActor.cs:432`:
|
||||
```csharp
|
||||
// before:
|
||||
composition = DeploymentArtifact.ParseComposition(blob);
|
||||
// after:
|
||||
composition = DeploymentArtifact.ParseComposition(blob, _localNode.Value);
|
||||
```
|
||||
|
||||
**Step 4: Run — verify PASS, then the whole Runtime suite for no regression:**
|
||||
```bash
|
||||
dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests
|
||||
```
|
||||
Expected: new test green; all pre-existing tests green (single-cluster harnesses
|
||||
hit the `None` branch → unchanged).
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorClusterScopeTests.cs
|
||||
git commit -m "feat(runtime): DriverHost spawns + subscribes only its own ClusterId's drivers"
|
||||
```
|
||||
|
||||
**Acceptance:** a site node spawns only its cluster's drivers and still acks
|
||||
Applied with an empty slice; existing single-cluster tests stay green.
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Wire scoped composition into `OpcUaPublishActor.HandleRebuild`
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 3
|
||||
|
||||
**Blocked by:** Task 2.
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs:212` (HandleRebuild)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorRebuildTests.cs`
|
||||
|
||||
**Context:** `HandleRebuild` (line ~210) loads the artifact then calls
|
||||
`ParseComposition(artifact)` and materialises via `Phase7Applier`. `_localNode` is
|
||||
`NodeId?` (line 46) — null on legacy/dev callers, so guard for null. The existing
|
||||
`OpcUaPublishActorRebuildTests` use a fake/inspectable sink — reuse that pattern.
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```csharp
|
||||
// Build a 2-cluster artifact (MAIN galaxy tag t-main; SITE-A galaxy tag t-sa),
|
||||
// seal it as a Deployment row in the test DbContext, construct an OpcUaPublishActor
|
||||
// with _localNode = NodeId.Parse("site-a-1:4053") and an inspectable sink, send
|
||||
// RebuildAddressSpace(correlation, depId), then assert the sink received ONLY the
|
||||
// SITE-A variable/folders (t-sa) and NOT the MAIN ones (t-main).
|
||||
// Mirror with _localNode = "central-1:4053" → only MAIN.
|
||||
```
|
||||
|
||||
**Step 2: Run — verify FAIL** (sink currently gets both clusters' nodes).
|
||||
|
||||
**Step 3: Implement** — `OpcUaPublishActor.cs:212`:
|
||||
```csharp
|
||||
// before:
|
||||
var composition = DeploymentArtifact.ParseComposition(artifact);
|
||||
// after: scope to this node's ClusterId when we know our identity; legacy/dev callers (null
|
||||
// _localNode) keep the unscoped behaviour.
|
||||
var composition = _localNode is { } ln
|
||||
? DeploymentArtifact.ParseComposition(artifact, ln.Value)
|
||||
: DeploymentArtifact.ParseComposition(artifact);
|
||||
```
|
||||
|
||||
**Step 4: Run — verify PASS + full Runtime suite:**
|
||||
```bash
|
||||
dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests
|
||||
```
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorRebuildTests.cs
|
||||
git commit -m "feat(runtime): OPC UA rebuild materialises only the node's ClusterId slice"
|
||||
```
|
||||
|
||||
**Acceptance:** a node materialises only its own cluster's address space; null
|
||||
`_localNode` keeps legacy behavior; existing rebuild tests stay green.
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Multi-cluster scoping E2E on the cluster harness
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none
|
||||
|
||||
**Blocked by:** Task 3, Task 4.
|
||||
|
||||
**Files:**
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/` (add `MultiClusterScopingTests.cs`)
|
||||
- Possibly Modify: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/TwoNodeClusterHarness.cs` (only if a 2-ClusterId seed helper is needed)
|
||||
|
||||
**Context:** `TwoNodeClusterHarness` boots an in-process 2-node cluster on an
|
||||
in-memory DB with a null OPC UA sink. It proves the *deploy path* end-to-end
|
||||
(compose → broadcast → apply → ack) but cannot assert a materialised tree (null
|
||||
sink). So this test asserts **driver** scoping through the real path: seed two
|
||||
`ServerCluster` rows + two `ClusterNode` rows (one per node, different `ClusterId`)
|
||||
+ one `DriverInstance` per cluster, run one deployment, and assert via each node's
|
||||
`GetDiagnostics` that each node hosts only its own cluster's driver.
|
||||
|
||||
If the harness's seed helpers can't express two clusters cleanly, that's a plan
|
||||
defect — surface it; the unit + actor tests (Tasks 1–4) already cover the scoping
|
||||
logic, and Task 9 covers the live proof.
|
||||
|
||||
**Step 1: Write the failing test** (per the context above).
|
||||
**Step 2: Run — verify FAIL.**
|
||||
**Step 3:** No production code — this test passes once Tasks 3+4 are in. If it
|
||||
fails, the defect is in 3/4; fix there, not here.
|
||||
**Step 4: Run — verify PASS:**
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests --filter "FullyQualifiedName~MultiClusterScoping"`
|
||||
**Step 5: Commit**
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/MultiClusterScopingTests.cs
|
||||
# add TwoNodeClusterHarness.cs ONLY if you modified it
|
||||
git commit -m "test(integration): multi-cluster deploy scopes drivers per node"
|
||||
```
|
||||
|
||||
**Acceptance:** one deploy over a 2-cluster mesh leaves each node hosting only its
|
||||
own cluster's driver.
|
||||
|
||||
---
|
||||
|
||||
## Task 6: Rewrite `docker-dev/docker-compose.yml` for the single mesh
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 1, Task 2, Task 7, Task 8
|
||||
|
||||
**Files:**
|
||||
- Modify: `docker-dev/docker-compose.yml`
|
||||
|
||||
**Context:** Today MAIN is 4 nodes (`admin-a`/`admin-b` admin + `driver-a`/`driver-b`
|
||||
driver) as one mesh; SITE-A/SITE-B are 2-node fused meshes with their own seeds.
|
||||
The anchor `&otopcua-host` (currently on `admin-a`) holds the shared build + env.
|
||||
|
||||
**Steps:**
|
||||
|
||||
1. Replace the four MAIN services with **two fused nodes**:
|
||||
- `central-1` (becomes the `&otopcua-host` anchor): `OTOPCUA_ROLES: "admin,driver"`,
|
||||
`ASPNETCORE_URLS: "http://+:9000"`, `Cluster__PublicHostname: "central-1"`,
|
||||
`Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"`, `Cluster__Roles__0: "admin"`,
|
||||
`Cluster__Roles__1: "driver"`, keep all `Security__*` (Jwt/Ldap/DeployApiKey) + `GALAXY_MXGW_API_KEY`
|
||||
(keep the existing `${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_...}` default — do **not** introduce a new
|
||||
hardcoded key), `ports: ["4840:4840"]`.
|
||||
- `central-2`: same env, `Cluster__PublicHostname: "central-2"`, seed → `central-1`,
|
||||
`ports: ["4841:4840"]`, `depends_on: { sql: healthy, central-1: started }`.
|
||||
2. Convert the four site services to **driver-only**, all seeding `central-1`:
|
||||
- For each of `site-a-1`, `site-a-2`, `site-b-1`, `site-b-2`: `OTOPCUA_ROLES: "driver"`,
|
||||
`Cluster__Roles__0: "driver"` (remove the `admin` role + `Cluster__Roles__1`), keep
|
||||
`Cluster__PublicHostname` = own name, set `Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"`,
|
||||
**remove** `ASPNETCORE_URLS` + the `Security__Jwt__*` / `Security__Ldap__*` / `Security__DeployApiKey`
|
||||
block (driver-only nodes serve no UI and authenticate no users), keep the `ConnectionStrings__ConfigDb`
|
||||
+ `GALAXY_MXGW_API_KEY` lines, keep their OPC UA ports (`4842`–`4845`), and add
|
||||
`depends_on: { sql: healthy, central-1: started }`.
|
||||
3. Update `traefik.depends_on` to `[central-1, central-2]` (drop the removed services).
|
||||
4. Rewrite the header comment block (lines ~1–40) to describe the **single mesh,
|
||||
hub-and-spoke** topology: one Akka mesh seeded by `central-1`; `central-1/2`
|
||||
are `admin,driver` (the only UI + deploy singleton); `site-*` are `driver`-only
|
||||
members scoped by `ClusterId`; central UI at `:9200` manages + deploys to all.
|
||||
Keep the existing accurate notes (SQL persistence, mesh isolation note now
|
||||
describes a single mesh, headless deploy via `:9200/api/deployments`).
|
||||
|
||||
**Verify:**
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml config --quiet && echo "compose OK"
|
||||
```
|
||||
Expected: `compose OK`.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add docker-dev/docker-compose.yml
|
||||
git commit -m "feat(docker-dev): single-mesh hub-and-spoke (central-1/2 + driver-only sites)"
|
||||
```
|
||||
|
||||
**Acceptance:** `docker compose config` parses; central nodes fused with UI; site
|
||||
nodes driver-only seeding central; no new hardcoded API key.
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Rewrite `docker-dev/traefik-dynamic.yml` (central-only route)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 1, Task 2, Task 6, Task 8
|
||||
|
||||
**Files:**
|
||||
- Modify: `docker-dev/traefik-dynamic.yml`
|
||||
|
||||
**Steps:**
|
||||
1. Keep the `otopcua-admin` router (`PathPrefix(`/`)`) + service, but point its
|
||||
`loadBalancer.servers` at `http://central-1:9000` and `http://central-2:9000`.
|
||||
2. Delete the `otopcua-site-a` and `otopcua-site-b` routers **and** their services
|
||||
(driver-only sites serve no UI). Keep the sticky-cookie + `/health/active`
|
||||
healthcheck on the surviving `otopcua-admin` service.
|
||||
3. Update the file header comment to describe the single central UI route.
|
||||
|
||||
**Verify:** covered by Task 6's `docker compose config` (Traefik file is mounted,
|
||||
not parsed by compose) — sanity-check it's valid YAML by eye; the live bring-up in
|
||||
Task 9 is the real check.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add docker-dev/traefik-dynamic.yml
|
||||
git commit -m "feat(docker-dev): Traefik routes only the central cluster UI"
|
||||
```
|
||||
|
||||
**Acceptance:** one router → central-1/central-2; site routers/services removed.
|
||||
|
||||
---
|
||||
|
||||
## Task 8: Rewrite `docker-dev/seed/seed-clusters.sql` (MAIN nodes → central-1/2)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 1, Task 2, Task 6, Task 7
|
||||
|
||||
**Files:**
|
||||
- Modify: `docker-dev/seed/seed-clusters.sql`
|
||||
|
||||
**Context:** The seed inserts 3 `ServerCluster` rows + 6 `ClusterNode` rows. MAIN's
|
||||
`ClusterNode` rows are currently `driver-a:4053` / `driver-b:4053`. Sites already
|
||||
have **no** drivers/tags (only `ServerCluster` + `ClusterNode`), which matches the
|
||||
"empty sites" decision — leave them empty.
|
||||
|
||||
**Steps:**
|
||||
1. Change MAIN's two `ClusterNode` inserts from `driver-a` / `driver-b` to
|
||||
`central-1` / `central-2`: `NodeId` `central-1:4053` / `central-2:4053`,
|
||||
`Host` `central-1` / `central-2`, `ApplicationUri` `urn:OtOpcUa:central-1` /
|
||||
`urn:OtOpcUa:central-2`, keep `OpcUaPort 4840`, `ServiceLevelBase` 200/150.
|
||||
Update the `IF NOT EXISTS ... WHERE NodeId = '...'` guards to the new ids.
|
||||
2. Update the MAIN `ServerCluster` `Notes` to "central-1/central-2 fused
|
||||
admin+driver — UI + deploy singleton + MAIN OPC UA publishers."
|
||||
3. Update the SITE-A/SITE-B `ServerCluster` `Notes` to "2-node driver-only,
|
||||
managed by the central cluster over the shared mesh (empty until configured)."
|
||||
4. Update the file header comment block (the `ClusterNode` map at lines ~5–7) to
|
||||
`central-1, central-2 → MAIN`.
|
||||
5. Leave the Galaxy namespace/driver/tags (MAIN) and the LDAP→role mappings
|
||||
unchanged.
|
||||
|
||||
**Verify:** SQL isn't run locally on macOS (no SQL reachable); correctness is
|
||||
confirmed by the live bring-up in Task 9 (the `cluster-seed` job runs it). Eyeball
|
||||
that every changed `NodeId`/guard is consistent.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add docker-dev/seed/seed-clusters.sql
|
||||
git commit -m "feat(docker-dev): seed MAIN ClusterNodes as central-1/central-2"
|
||||
```
|
||||
|
||||
**Acceptance:** MAIN `ClusterNode` rows are `central-1`/`central-2`; sites keep
|
||||
their cluster + node rows with no drivers; notes/header updated.
|
||||
|
||||
---
|
||||
|
||||
## Task 9: Live docker-dev verification
|
||||
|
||||
**Classification:** standard (verification — no subagent review needed)
|
||||
**Estimated implement time:** ~5 min (plus container build time)
|
||||
**Parallelizable with:** none
|
||||
|
||||
**Blocked by:** Task 3, Task 4, Task 5, Task 6, Task 7, Task 8.
|
||||
|
||||
**Files:** none (operational).
|
||||
|
||||
**Steps (run from repo root on this Mac; Docker is local):**
|
||||
1. Sync deployment + rebuild the image and bring the rig up:
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml down
|
||||
docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
```
|
||||
2. Confirm the mesh formed and the seed ran:
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml ps
|
||||
docker compose -f docker-dev/docker-compose.yml logs cluster-seed --tail=40
|
||||
```
|
||||
Expect 3 `ServerCluster` rows + 6 `ClusterNode` rows (`central-1/2`, `site-a-1/2`, `site-b-1/2`).
|
||||
3. Trigger a global deploy headlessly (no UI login needed — the deploy API is on
|
||||
the central admin nodes):
|
||||
```bash
|
||||
curl -s -X POST http://localhost:9200/api/deployments \
|
||||
-H "X-Api-Key: docker-dev-deploy-key" -H "Content-Type: application/json" \
|
||||
-d '{"createdBy":"per-cluster-verify"}'
|
||||
```
|
||||
Expect `202` + a `deploymentId`.
|
||||
4. Confirm scoping in the driver logs — central applies the Galaxy driver, sites apply empty:
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml logs central-1 | grep -iE "applied deployment|galaxy|materialis"
|
||||
docker compose -f docker-dev/docker-compose.yml logs site-a-1 | grep -iE "applied deployment|galaxy|materialis"
|
||||
```
|
||||
Expect `central-1` to materialise the MAIN Galaxy tags; `site-a-1` to apply with **no** Galaxy/MAIN nodes.
|
||||
5. Browse-check with the Client CLI:
|
||||
```bash
|
||||
dotnet run --project src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI -- browse -u opc.tcp://localhost:4840 -r -d 4 # central: Galaxy tree
|
||||
dotnet run --project src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI -- browse -u opc.tcp://localhost:4842 -r -d 4 # site-a: empty (no MAIN tree)
|
||||
```
|
||||
Expect the MAIN Galaxy hierarchy on `:4840` and an **empty** address space on `:4842` (NOT the merged tree).
|
||||
|
||||
**Acceptance:** the rig boots as one mesh; a single deploy populates the central
|
||||
OPC UA tree and leaves the site nodes empty (proving per-ClusterId scoping live).
|
||||
If anything regresses, stop and debug (do not paper over).
|
||||
|
||||
---
|
||||
|
||||
## Task 10: Update docker-dev docs + memory
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 9
|
||||
|
||||
**Blocked by:** Task 6 (final topology).
|
||||
|
||||
**Files:**
|
||||
- Modify: `CLAUDE.md` (the Docker Workflow / docker-dev references that mention the cluster layout)
|
||||
- Modify: `docs/v2/dev-environment.md` (if it describes the three-isolated-mesh topology)
|
||||
- Modify: `/Users/dohertj2/.claude/projects/-Users-dohertj2-Desktop-OtOpcUa/memory/project_dev_environment.md` + `MEMORY.md` pointer
|
||||
|
||||
**Steps:**
|
||||
1. In `CLAUDE.md` and `docs/v2/dev-environment.md`, update any description of the
|
||||
docker-dev topology from "three isolated meshes / MAIN admin-a+admin-b /
|
||||
site fused" to "single mesh, hub-and-spoke: `central-1`/`central-2` fused
|
||||
admin+driver own the only UI + deploy singleton; `site-*` are driver-only
|
||||
members scoped by `ClusterId`; central UI at `:9200` deploys to all." Update
|
||||
the OPC UA endpoint list (`central-1` `:4840`, `central-2` `:4841`, sites
|
||||
`:4842`–`:4845`).
|
||||
2. Update the `project_dev_environment.md` memory's docker-dev section to match
|
||||
(node names, hub-and-spoke, "central UI deploys to all clusters"). Keep the
|
||||
one-line `MEMORY.md` pointer accurate.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add CLAUDE.md docs/v2/dev-environment.md
|
||||
git commit -m "docs(docker-dev): document single-mesh hub-and-spoke topology"
|
||||
```
|
||||
(The memory files live outside the repo — write them with the memory workflow, not git.)
|
||||
|
||||
**Acceptance:** docs + memory describe the new topology accurately.
|
||||
|
||||
---
|
||||
|
||||
## Done criteria
|
||||
|
||||
- `dotnet build ZB.MOM.WW.OtOpcUa.slnx` clean.
|
||||
- `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests` green (new scoping
|
||||
tests + no regressions) and `...Host.IntegrationTests` multi-cluster test green.
|
||||
- The full pre-existing suite stays green (the `ClusterCount ≤ 1` lenient branch
|
||||
guarantees single-cluster behavior is unchanged).
|
||||
- `docker compose config` parses; the live rig (Task 9) shows central serving the
|
||||
Galaxy tree and sites empty under one global deploy.
|
||||
|
||||
## Out of scope (follow-ups)
|
||||
|
||||
- **Per-cluster deploy** (deploy just SITE-A from the UI) — coordinator + UI work.
|
||||
- **Seeding demo drivers on the sites** — added via the central UI.
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-07-per-cluster-scoping.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 1: ResolveClusterScope + node-scoped ParseDriverInstances", "status": "pending"},
|
||||
{"id": 2, "subject": "Task 2: Node-scoped ParseComposition (address-space filter)", "status": "pending", "blockedBy": [1]},
|
||||
{"id": 3, "subject": "Task 3: Wire driver-spawn + SubscribeBulk filtering into DriverHostActor", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 4, "subject": "Task 4: Wire scoped composition into OpcUaPublishActor.HandleRebuild", "status": "pending", "blockedBy": [2]},
|
||||
{"id": 5, "subject": "Task 5: Multi-cluster scoping E2E on the cluster harness", "status": "pending", "blockedBy": [3, 4]},
|
||||
{"id": 6, "subject": "Task 6: Rewrite docker-dev/docker-compose.yml for the single mesh", "status": "pending"},
|
||||
{"id": 7, "subject": "Task 7: Rewrite docker-dev/traefik-dynamic.yml (central-only route)", "status": "pending"},
|
||||
{"id": 8, "subject": "Task 8: Rewrite docker-dev/seed/seed-clusters.sql (MAIN nodes -> central-1/2)", "status": "pending"},
|
||||
{"id": 9, "subject": "Task 9: Live docker-dev verification", "status": "pending", "blockedBy": [3, 4, 5, 6, 7, 8]},
|
||||
{"id": 10, "subject": "Task 10: Update docker-dev docs + memory", "status": "pending", "blockedBy": [6]}
|
||||
],
|
||||
"lastUpdated": "2026-06-07"
|
||||
}
|
||||
@@ -3,6 +3,12 @@
|
||||
> **Status (2026-05-29): alarm-source leg VERIFIED. Historian-write leg still
|
||||
> pending the Windows sidecar + live AVEVA Historian.**
|
||||
>
|
||||
> **Re-confirmed 2026-05-31** against the same gateway (`http://10.100.0.48:5120`):
|
||||
> the Skip-gated live test passed again, pulling a native `Raise` transition
|
||||
> (`Galaxy!TestArea.TestMachine_001.TestAlarm001`, raw sev 500 → OPC UA 750/High,
|
||||
> category `TestArea`, operator comment `Test alarm #1`) through the production
|
||||
> consumer. Independent re-run, not the original capture.
|
||||
>
|
||||
> This is the D.1 deliverable called for by `docs/plans/alarms-worker-wiring-plan.md`
|
||||
> — captured evidence that a live Galaxy alarm reaches lmxopcua through the native
|
||||
> gateway path (not the sub-attribute fallback). It supersedes the "A.2 blocked"
|
||||
|
||||
+105
-82
@@ -4,12 +4,15 @@
|
||||
> Paths + project names moved: `OtOpcUa.Server/Security/` → `OtOpcUa.Security/`
|
||||
> (`Ldap/`, `Jwt/`, `Endpoints/AuthEndpoints.cs`), `OtOpcUa.Admin` is gone (its
|
||||
> auth + role-grant pages live in `OtOpcUa.AdminUI`), and Admin auth policies
|
||||
> register in `OtOpcUa.Host/Program.cs` via `AddOtOpcUaAuth` rather than in a
|
||||
> separate Admin process. The v2 `Security:Jwt` section adds JWT bearer auth
|
||||
> alongside the existing cookie scheme (`AddJwtBearer` wired via
|
||||
> `IPostConfigureOptions<JwtBearerOptions>` in `OtOpcUa.Security`). DataProtection
|
||||
> keys persist to the shared `ConfigDb.DataProtectionKeys` table so cookies
|
||||
> survive failover between admin-role nodes.
|
||||
> register from `OtOpcUa.Host/Program.cs` via `AddOtOpcUaAuth`
|
||||
> (`src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`) rather
|
||||
> than in a separate Admin process. The Admin UI uses a **single Cookie
|
||||
> authentication scheme** — there is no `AddJwtBearer` pipeline. The
|
||||
> `Security:Jwt` section configures `JwtTokenService`, which mints a JWT at the
|
||||
> `/auth/token` endpoint for **external** consumers (OPC UA clients / automation
|
||||
> scripts); the cookie itself stores the `ClaimsPrincipal` directly. DataProtection
|
||||
> keys persist to the shared Config DB (`PersistKeysToDbContext<OtOpcUaConfigDbContext>`)
|
||||
> so cookies survive failover between admin-role nodes.
|
||||
>
|
||||
> See `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §5 for the v2
|
||||
> auth + DataProtection rationale.
|
||||
@@ -18,8 +21,8 @@ OtOpcUa has four independent security concerns. This document covers all four:
|
||||
|
||||
1. **Transport security** — OPC UA secure channel (signing, encryption, X.509 trust).
|
||||
2. **OPC UA authentication** — Anonymous / UserName / X.509 session identities; UserName tokens authenticated by LDAP bind.
|
||||
3. **Data-plane authorization** — who can browse, read, subscribe, write, acknowledge alarms on which nodes. Evaluated by `PermissionTrie` against the Config DB `NodeAcl` tree.
|
||||
4. **Control-plane authorization** — who can view or edit fleet configuration in the Admin UI. Gated by the `AdminRole` (`ConfigViewer` / `ConfigEditor` / `FleetAdmin`) claim from `LdapGroupRoleMapping`.
|
||||
3. **Data-plane authorization** — who can browse, read, subscribe, write, acknowledge alarms on which nodes. Evaluated by `TriePermissionEvaluator` over a `PermissionTrie` built from the Config DB `NodeAcl` tree.
|
||||
4. **Control-plane authorization** — who can view or edit fleet configuration in the Admin UI. Gated by the `AdminRole` (`Viewer` / `Designer` / `Administrator`) claim resolved from `LdapGroupRoleMapping`.
|
||||
|
||||
Transport security and OPC UA authentication are per-node concerns configured in the Server's bootstrap `appsettings.json`. Data-plane ACLs and Admin role grants live in the Config DB.
|
||||
|
||||
@@ -33,42 +36,43 @@ The OtOpcUa Server supports configurable OPC UA transport security profiles that
|
||||
|
||||
There are two distinct layers of security in OPC UA:
|
||||
|
||||
- **Transport security** -- secures the communication channel itself using TLS-style certificate exchange, message signing, and encryption. This is what the `OpcUaServer:SecurityProfile` setting controls.
|
||||
- **Transport security** -- secures the communication channel itself using TLS-style certificate exchange, message signing, and encryption. This is what the `OpcUa:EnabledSecurityProfiles` setting controls.
|
||||
- **UserName token encryption** -- protects user credentials (username/password) sent during session activation. The OPC UA stack encrypts UserName tokens using the server's application certificate regardless of the transport security mode. UserName authentication therefore works on `None` endpoints too — the credentials themselves are always encrypted. A secure transport profile adds protection against message-level tampering and eavesdropping of data payloads.
|
||||
|
||||
### Supported security profiles
|
||||
|
||||
The server supports seven transport security profiles:
|
||||
The profiles are the members of the `OpcUaSecurityProfile` enum (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs`). The server ships **three** baseline profiles; the config value is the bare enum-member name (no hyphens, no underscores):
|
||||
|
||||
| Profile Name | Security Policy | Message Security Mode | Description |
|
||||
|-----------------------------------|----------------------------|-----------------------|--------------------------------------------------|
|
||||
| `None` | None | None | No signing or encryption. Suitable for development and isolated networks only. |
|
||||
| `Basic256Sha256-Sign` | Basic256Sha256 | Sign | Messages are signed but not encrypted. Protects against tampering but data is visible on the wire. |
|
||||
| `Basic256Sha256-SignAndEncrypt` | Basic256Sha256 | SignAndEncrypt | Messages are both signed and encrypted. Full protection against tampering and eavesdropping. |
|
||||
| `Aes128_Sha256_RsaOaep-Sign` | Aes128_Sha256_RsaOaep | Sign | Modern profile with AES-128 encryption and SHA-256 signing. |
|
||||
| `Aes128_Sha256_RsaOaep-SignAndEncrypt` | Aes128_Sha256_RsaOaep | SignAndEncrypt | Modern profile with AES-128 encryption. Recommended for production. |
|
||||
| `Aes256_Sha256_RsaPss-Sign` | Aes256_Sha256_RsaPss | Sign | Strongest profile with AES-256 and RSA-PSS signatures. |
|
||||
| `Aes256_Sha256_RsaPss-SignAndEncrypt` | Aes256_Sha256_RsaPss | SignAndEncrypt | Strongest profile. Recommended for high-security deployments. |
|
||||
| Enum member | Security Policy | Message Security Mode | Description |
|
||||
|---------------------------------|------------------|-----------------------|--------------------------------------------------|
|
||||
| `None` | None | None | No signing or encryption. Suitable for development and isolated networks only. |
|
||||
| `Basic256Sha256Sign` | Basic256Sha256 | Sign | Messages are signed but not encrypted. Protects against tampering but data is visible on the wire. |
|
||||
| `Basic256Sha256SignAndEncrypt` | Basic256Sha256 | SignAndEncrypt | Messages are both signed and encrypted. Full protection against tampering and eavesdropping. |
|
||||
|
||||
The server exposes a separate endpoint for each configured profile, and clients select the one they prefer during connection.
|
||||
`BuildSecurityPolicies` (`OpcUaApplicationHost.cs`) maps each configured profile to an SDK `ServerSecurityPolicy`. The server exposes a separate endpoint per configured profile and clients select the one they prefer at session open. The enum's XML doc notes that Aes128/Aes256 variants can be added later by extending the enum + `BuildSecurityPolicies` — the wiring is profile-agnostic — but they are **not implemented today**. There is no `SecurityProfileResolver` class.
|
||||
|
||||
> **Config value form.** The enum binds by member name, so a profile string with hyphens (e.g. `Basic256Sha256-Sign`) does **not** bind — use the exact enum-member spelling above. If `EnabledSecurityProfiles` is empty, the server falls back to a single `None` endpoint (logged, very visible) so it still has a listening endpoint.
|
||||
|
||||
### Configuration
|
||||
|
||||
Transport security is configured in the `OpcUaServer` section of the Server process's bootstrap `appsettings.json`:
|
||||
Transport security is configured in the `OpcUa` section of the Host process's bootstrap `appsettings.json` (bound to `OpcUaApplicationHostOptions`):
|
||||
|
||||
```json
|
||||
{
|
||||
"OpcUaServer": {
|
||||
"EndpointUrl": "opc.tcp://0.0.0.0:4840/OtOpcUa",
|
||||
"ApplicationName": "OtOpcUa Server",
|
||||
"OpcUa": {
|
||||
"ApplicationName": "OtOpcUa",
|
||||
"ApplicationUri": "urn:node-a:OtOpcUa",
|
||||
"PublicHostname": "0.0.0.0",
|
||||
"OpcUaPort": 4840,
|
||||
"PkiStoreRoot": "C:/ProgramData/OtOpcUa/pki",
|
||||
"AutoAcceptUntrustedClientCertificates": false,
|
||||
"SecurityProfile": "Basic256Sha256-SignAndEncrypt"
|
||||
"EnabledSecurityProfiles": [ "Basic256Sha256Sign", "Basic256Sha256SignAndEncrypt" ]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`EnabledSecurityProfiles` is a **list** — the server publishes one endpoint per entry. The default (when the key is omitted) is all three baseline profiles (`None`, `Basic256Sha256Sign`, `Basic256Sha256SignAndEncrypt`); production deployments typically drop `None`. The list must contain at least one entry (`OpcUaApplicationHostOptionsValidator` enforces `MinCount(…, 1)`).
|
||||
|
||||
The server certificate is auto-generated on first start if none exists in `PkiStoreRoot/own/`. Always generated even for `None`-only deployments because UserName token encryption depends on it.
|
||||
|
||||
### PKI directory layout
|
||||
@@ -91,13 +95,13 @@ When a client connects using a secure profile (`Sign` or `SignAndEncrypt`), the
|
||||
4. If not found and `AutoAcceptUntrustedClientCertificates` is `true`, the certificate is automatically copied to `trusted/` and the connection proceeds.
|
||||
5. If not found and `AutoAcceptUntrustedClientCertificates` is `false`, the certificate is copied to `rejected/` and the connection is refused.
|
||||
|
||||
The Admin UI `Certificates.razor` page uses `CertTrustService` (singleton reading `CertTrustOptions` for the Server's `PkiStoreRoot`) to promote rejected client certs to trusted without operators having to file-copy manually.
|
||||
The Admin UI `Certificates.razor` page (`src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Certificates.razor`) lists the contents of each PKI sub-store (own / trusted / issuer / rejected) by reading the `OpcUa:PkiStoreRoot` path from configuration. It is currently a **read-only viewer** — promoting a rejected cert to trusted is still a file move (copy the `.der` from `rejected/` to `trusted/certs/`); the SDK trust list reloads on the next handshake.
|
||||
|
||||
### Production hardening
|
||||
|
||||
- Set `AutoAcceptUntrustedClientCertificates = false`.
|
||||
- Drop `None` from the profile set.
|
||||
- Use the Admin UI to promote trusted client certs rather than the auto-accept fallback.
|
||||
- Drop `None` from `EnabledSecurityProfiles`.
|
||||
- Promote trusted client certs by moving the `.der` from `rejected/` to `trusted/certs/` rather than relying on the auto-accept fallback. (The Admin UI Certificates page shows what is in each store.)
|
||||
- Periodically audit the `rejected/` directory; an unexpected entry is often a misconfigured client or a probe attempt.
|
||||
|
||||
---
|
||||
@@ -108,59 +112,55 @@ The Server accepts three OPC UA identity-token types:
|
||||
|
||||
| Token | Handler | Notes |
|
||||
|---|---|---|
|
||||
| Anonymous | `IUserAuthenticator.AuthenticateAsync(username: "", password: "")` | Refused in strict mode unless explicit anonymous grants exist; allowed in lax mode for backward compatibility. |
|
||||
| UserName/Password | `LdapOpcUaUserAuthenticator` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/OpcUa/LdapOpcUaUserAuthenticator.cs`, backed by `LdapAuthService` at `src/Server/ZB.MOM.WW.OtOpcUa.Security/Ldap/LdapAuthService.cs`) | LDAP bind + group lookup; resolved `LdapGroups` flow into the session's identity bearer (`ILdapGroupsBearer`). |
|
||||
| X.509 Certificate | Stack-level acceptance + role mapping via CN | X.509 identity carries `AuthenticatedUser` + read roles; finer-grain authorization happens through the data-plane ACLs. |
|
||||
| Anonymous | No `IOpcUaUserAuthenticator` call — the SDK admits anonymous sessions at the channel. | Data-plane authorization (below) still default-denies any node a session has no ACL grant for. |
|
||||
| UserName/Password | `LdapOpcUaUserAuthenticator.AuthenticateUserNameAsync` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/OpcUa/LdapOpcUaUserAuthenticator.cs`, implements `IOpcUaUserAuthenticator`), backed by the app `ILdapAuthService` — `OtOpcUaLdapAuthService` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/Ldap/OtOpcUaLdapAuthService.cs`). | LDAP bind + group lookup. The returned LDAP groups are mapped to roles via `IGroupRoleMapper<string>` (`OtOpcUaGroupRoleMapper`) and attached to the OPC UA session identity for the downstream ACL evaluator. |
|
||||
| X.509 Certificate | Stack-level acceptance during the secure-channel handshake. | The certificate must be trusted (see PKI trust flow); finer-grain authorization happens through the data-plane ACLs. |
|
||||
|
||||
### LDAP bind flow (`LdapUserAuthenticator`)
|
||||
When no authenticator is supplied, `OpcUaApplicationHost` falls back to `NullOpcUaUserAuthenticator`; the Host wires the real `LdapOpcUaUserAuthenticator` as a singleton in `Program.cs`.
|
||||
|
||||
`Program.cs` in the Server registers the authenticator based on `OpcUaServer:Ldap`:
|
||||
### LDAP bind flow (`OtOpcUaLdapAuthService`)
|
||||
|
||||
```csharp
|
||||
builder.Services.AddSingleton<IUserAuthenticator>(sp => ldapOptions.Enabled
|
||||
? new LdapUserAuthenticator(ldapOptions, sp.GetRequiredService<ILogger<LdapUserAuthenticator>>())
|
||||
: new DenyAllUserAuthenticator());
|
||||
```
|
||||
LDAP is configured under the `Security:Ldap` section (bound to `LdapOptions`, `src/Server/ZB.MOM.WW.OtOpcUa.Security/Ldap/LdapOptions.cs`, `SectionName = "Security:Ldap"`). The app authenticator is `OtOpcUaLdapAuthService` — a thin wrapper around the shared `ZB.MOM.WW.Auth.Ldap` directory client that adds two app-only concerns the shared library deliberately does not model: the `Enabled` master switch and `DevStubMode`. The same `ILdapAuthService` instance serves **both** the Admin UI cookie login (`/auth/login`) and the OPC UA UserName path (via `LdapOpcUaUserAuthenticator`), so operators use one credential across both planes.
|
||||
|
||||
`LdapUserAuthenticator`:
|
||||
`OtOpcUaLdapAuthService.AuthenticateAsync`:
|
||||
|
||||
1. Refuses to bind over plain-LDAP unless `AllowInsecureLdap = true` (dev/test only).
|
||||
2. Connects to `Server:Port`, optionally upgrades to TLS (`UseTls = true`, port 636 for AD).
|
||||
3. Binds as the service account; searches `SearchBase` for `UserNameAttribute = username`.
|
||||
4. Rebinds as the resolved user DN with the supplied password (the actual credential check).
|
||||
5. Reads `GroupAttribute` (default `memberOf`) and strips the leading `CN=` so operators configure friendly group names in `GroupToRole`.
|
||||
6. Returns a `UserAuthResult` carrying the validated username + the set of LDAP groups. The set flows through to the session identity via `ILdapGroupsBearer.LdapGroups`.
|
||||
1. If `Enabled = false`, denies outright — no bind, no DevStub bypass (the master switch wins).
|
||||
2. If `DevStubMode = true`, accepts any non-empty credentials and grants the `Administrator` role **without any network bind** (dev only — must be `false` in production).
|
||||
3. Refuses to bind over a plaintext transport (`Transport = None`) unless `AllowInsecure = true` (dev/test only). This is enforced at login, not at startup.
|
||||
4. Delegates the real path to the shared `ZB.MOM.WW.Auth.Ldap` client: it binds (search-then-bind via `ServiceAccountDn`, or direct-bind `cn={user},{SearchBase}` when no service account is set), verifies the password, and reads the user's group memberships.
|
||||
5. Returns an `LdapAuthResult` carrying the validated username + the **groups** (never roles). Failure codes are folded into opaque user-facing error strings so a probe cannot distinguish "unknown user" from "wrong password".
|
||||
|
||||
Configuration example (Active Directory production):
|
||||
**Group → role mapping happens downstream**, not in the auth service: `LdapOpcUaUserAuthenticator` resolves `IGroupRoleMapper<string>` (`OtOpcUaGroupRoleMapper`) per call and unions its output with any pre-resolved roles (the DevStub `Administrator` grant). The roles are attached to the OPC UA session identity for the ACL evaluator. A mapper fault (e.g. a Config DB outage) falls back to the pre-resolved baseline rather than denying an otherwise-authenticated session.
|
||||
|
||||
`Transport` replaces the former `UseTls` bool: `Ldaps` (implicit TLS), `StartTls` (upgrade), or `None` (plaintext, requires `AllowInsecure`). Configuration example (Active Directory production):
|
||||
|
||||
```json
|
||||
{
|
||||
"OpcUaServer": {
|
||||
"Security": {
|
||||
"Ldap": {
|
||||
"Enabled": true,
|
||||
"DevStubMode": false,
|
||||
"Server": "dc01.corp.example.com",
|
||||
"Port": 636,
|
||||
"UseTls": true,
|
||||
"AllowInsecureLdap": false,
|
||||
"Transport": "Ldaps",
|
||||
"AllowInsecure": false,
|
||||
"SearchBase": "DC=corp,DC=example,DC=com",
|
||||
"ServiceAccountDn": "CN=OtOpcUaSvc,OU=Service Accounts,DC=corp,DC=example,DC=com",
|
||||
"ServiceAccountPassword": "<from your secret store>",
|
||||
"GroupAttribute": "memberOf",
|
||||
"DisplayNameAttribute": "cn",
|
||||
"UserNameAttribute": "sAMAccountName",
|
||||
"GroupToRole": {
|
||||
"OPCUA-Operators": "WriteOperate",
|
||||
"OPCUA-Engineers": "WriteConfigure",
|
||||
"OPCUA-Tuners": "WriteTune",
|
||||
"OPCUA-AlarmAck": "AlarmAck"
|
||||
"OPCUA-Designers": "Designer",
|
||||
"OPCUA-Admins": "Administrator",
|
||||
"OPCUA-Operators": "Operator"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`UserNameAttribute: "sAMAccountName"` is the critical AD override — the default `uid` is not populated on AD user entries. Use `userPrincipalName` instead if operators log in with `user@corp.example.com` form. Nested group membership is not expanded — assign users directly to the role-mapped groups, or pre-flatten in AD.
|
||||
|
||||
The same options bind the Admin's `LdapAuthService` (cookie auth / login form) so operators authenticate with a single credential across both processes.
|
||||
`GroupToRole` maps LDAP group names → Admin roles (case-insensitive); a user gets every role whose source group is in their membership. The values are the canonical control-plane role strings (`Viewer` / `Designer` / `Administrator`, plus the appsettings-only `Operator` for the `DriverOperator` policy). `UserNameAttribute: "sAMAccountName"` is the critical AD override — the GLAuth dev default is `cn`, which is not how AD users are looked up; use `userPrincipalName` instead if operators log in with `user@corp.example.com` form. `LdapOptionsValidator` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/Configuration/LdapOptionsValidator.cs`) fails startup when `Transport = None` and `AllowInsecure = false` on a real-LDAP (non-DevStub) config.
|
||||
|
||||
---
|
||||
|
||||
@@ -172,20 +172,27 @@ Per decision #129 the model is **additive-only — no explicit Deny**. Grants at
|
||||
|
||||
### Hierarchy
|
||||
|
||||
ACLs are evaluated against the UNS path:
|
||||
ACLs are evaluated against the node's scope path. `NodeScope` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/NodeScope.cs`) carries a `Kind` that selects between two hierarchy shapes:
|
||||
|
||||
```
|
||||
ClusterId → Namespace → UnsArea → UnsLine → Equipment → Tag
|
||||
Equipment (UNS) kind: Cluster → Namespace → UnsArea → UnsLine → Equipment → Tag
|
||||
SystemPlatform (Galaxy) kind: Cluster → Namespace → FolderSegment(s) → Tag
|
||||
```
|
||||
|
||||
On the Galaxy/SystemPlatform path each folder segment takes one trie level, so a deeply-nested Galaxy folder reaches the same depth as a full UNS path. Unset mid-path levels leave the corresponding id `null` and the evaluator walks only as far as the scope goes.
|
||||
|
||||
Each level can carry `NodeAcl` rows (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Entities/NodeAcl.cs`) that grant a permission bundle to a set of `LdapGroups`.
|
||||
|
||||
### Permission flags
|
||||
|
||||
`NodePermissions` (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Enums/NodePermissions.cs`), stored as an `int` bitmask in `NodeAcl.PermissionFlags`:
|
||||
|
||||
```csharp
|
||||
[Flags]
|
||||
public enum NodePermissions : uint
|
||||
public enum NodePermissions : int
|
||||
{
|
||||
None = 0,
|
||||
|
||||
Browse = 1 << 0,
|
||||
Read = 1 << 1,
|
||||
Subscribe = 1 << 2,
|
||||
@@ -215,20 +222,20 @@ The three Write tiers map to Galaxy's v1 `SecurityClassification` — `FreeAcces
|
||||
| Class | Role |
|
||||
|---|---|
|
||||
| `PermissionTrie` | Cluster-scoped trie; each node carries `(GroupId → NodePermissions)` grants. |
|
||||
| `PermissionTrieBuilder` | Builds a trie from the current `NodeAcl` rows in one pass. |
|
||||
| `PermissionTrieCache` | Per-cluster memoised trie; invalidated via `AclChangeNotifier` when the Admin publishes a draft that touches ACLs. |
|
||||
| `TriePermissionEvaluator` | Implements `IPermissionEvaluator.Authorize(session, operation, scope)` — walks from the root to the leaf for the supplied `NodeScope`, unions grants along the path, compares required permission to the union. |
|
||||
| `PermissionTrieBuilder` | Builds a trie from the current `NodeAcl` rows in one pass and installs it into the cache. |
|
||||
| `PermissionTrieCache` | Process-singleton cache keyed on `(ClusterId, GenerationId)`. Generation-sealed: `Install(trie)` adds a new generation + advances the "current" pointer; older generations are retained (in-flight requests still resolve) and GC'd by `Prune`. `Invalidate(clusterId)` drops every cached trie for a cluster. There is **no** `AclChangeNotifier` — a publish installs a new generation rather than signalling an invalidation. |
|
||||
| `TriePermissionEvaluator` | Implements `IPermissionEvaluator.Authorize(session, operation, scope)`. Walks the cluster trie for the supplied `NodeScope`, unions grants along the path, and returns an `AuthorizationDecision`. Evaluates against the **session's bound generation** (`session.AuthGenerationId`), not just "current", so a grant added/removed in a newer generation cannot take effect mid-session. |
|
||||
|
||||
`NodeScope` carries `(ClusterId, NamespaceId, AreaId, LineId, EquipmentId, TagId)`; any suffix may be null — a tag-level ACL is more specific than an area-level ACL but both contribute via union.
|
||||
`NodeScope` is described above (Equipment-kind vs SystemPlatform-kind). The evaluator unions the matched grants along the path — a tag-level ACL and an area-level ACL both contribute.
|
||||
|
||||
### Dispatch gate — `IPermissionEvaluator`
|
||||
|
||||
`IPermissionEvaluator.Authorize(session, operation, scope)` (default impl `TriePermissionEvaluator` at `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs`) bridges the OPC UA stack's `ISystemContext.UserIdentity` to the trie. The dispatch path calls it on every Read, Write, HistoryRead, Browse, Subscribe, AckAlarm, Call. A non-allow decision short-circuits the dispatch with `BadUserAccessDenied`.
|
||||
`IPermissionEvaluator.Authorize(UserAuthorizationState session, OpcUaOperation operation, NodeScope scope)` (default impl `TriePermissionEvaluator` at `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs`) returns an `AuthorizationDecision`. The dispatch path calls it on every Read, Write, HistoryRead, Browse, Subscribe, AckAlarm, Call; a `NotGranted` decision denies the operation.
|
||||
|
||||
Key properties:
|
||||
|
||||
- **Driver-agnostic.** No driver-level code participates in authorization decisions. Drivers report `SecurityClassification` as metadata on tag discovery; everything else flows through the evaluator.
|
||||
- **Fail-open-during-transition.** `StrictMode = false` (default during ACL rollouts) lets sessions without resolved LDAP groups proceed; flip `Authorization:StrictMode = true` in production once ACLs are populated.
|
||||
- **Strictly fail-closed (default-deny).** Every guard path returns `NotGranted` — a stale session (past the staleness ceiling, decision #152), a cluster mismatch between session and scope, a missing trie, a pruned bound generation, or simply no matching grant. There is no `StrictMode` / fail-open mode; absence of a grant is always a deny.
|
||||
- **Evaluator stays pure.** `TriePermissionEvaluator` has no OPC UA stack dependency — it's tested directly from xUnit.
|
||||
|
||||
### Full model
|
||||
@@ -241,24 +248,40 @@ See [`docs/v2/acl-design.md`](v2/acl-design.md) for the complete design: trie in
|
||||
|
||||
Control-plane authorization governs **the Admin UI** — who can view fleet config, edit drafts, publish generations, manage cluster nodes + credentials.
|
||||
|
||||
Per decision #150 control-plane roles are **deliberately independent of data-plane ACLs**. An operator who can read every OPC UA tag in production may not be allowed to edit cluster config; conversely a ConfigEditor may not have any data-plane grants at all.
|
||||
Per decision #150 control-plane roles are **deliberately independent of data-plane ACLs**. An operator who can read every OPC UA tag in production may not be allowed to edit cluster config; conversely a `Designer` may not have any data-plane grants at all.
|
||||
|
||||
### Roles
|
||||
|
||||
The `AdminRole` enum (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Enums/AdminRole.cs`) defines:
|
||||
The `AdminRole` enum (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Enums/AdminRole.cs`) defines three roles. Task 1.7 standardized the member names on the canonical `ZB.MOM.WW.Auth` `CanonicalRole` vocabulary (`ConfigViewer → Viewer`, `ConfigEditor → Designer`, `FleetAdmin → Administrator`); a data migration (`CanonicalizeAdminRoles`) rewrote existing rows. This was a rename, not a permission change.
|
||||
|
||||
| Role | Capabilities |
|
||||
|---|---|
|
||||
| `ConfigViewer` | Read-only access to drafts, generations, audit log, fleet status. |
|
||||
| `ConfigEditor` | ConfigViewer plus draft editing (UNS, equipment, tags, ACLs, driver instances, reservations, CSV imports). Cannot publish. |
|
||||
| `FleetAdmin` | ConfigEditor plus publish, cluster/node CRUD, credential management, role-grant management. Also satisfies the `DriverOperator` authorization policy. |
|
||||
| `DriverOperator` | May issue **Reconnect** and **Restart** commands against live driver instances from the Admin UI `DriverStatusPanel`. Gated by the `DriverOperator` named policy in `AddAuthorization` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`). Map an LDAP group via `GroupToRole`, e.g. `"ot-driver-operator": "DriverOperator"`. |
|
||||
| `Viewer` | Read-only access to drafts, generations, audit log, fleet status. (Was `ConfigViewer`.) |
|
||||
| `Designer` | Viewer plus draft authoring (UNS, equipment, tags, ACLs, driver instances, reservations, CSV imports). Cannot publish. (Was `ConfigEditor`.) |
|
||||
| `Administrator` | Designer plus publish, cluster/node CRUD, credential management, role-grant management. Satisfies both the `FleetAdmin` and `DriverOperator` authorization policies. (Was `FleetAdmin`.) |
|
||||
|
||||
In v2 the authentication + authorization stack is wired centrally by `AddOtOpcUaAuth` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`) and Razor pages gate inline with the role names, e.g. `@attribute [Authorize(Roles = "FleetAdmin,ConfigEditor")]` on `Deployments.razor`. Nav-menu sections hide via `<AuthorizeView>`.
|
||||
`DriverOperator` is an **authorization policy name** (kept stable), not an `AdminRole` member. It gates **Reconnect** / **Restart** commands against live driver instances from the Admin UI `DriverStatusPanel` and requires the canonical role `Operator` or `Administrator` (`policy.RequireRole("Operator", "Administrator")` in `AddAuthorization`, `src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`). `Operator` is an appsettings-only string role (not an `AdminRole` member); map an LDAP group to it via `GroupToRole`, e.g. `"ot-driver-operator": "Operator"`. The `FleetAdmin` policy requires the `Administrator` role.
|
||||
|
||||
In v2 the authentication + authorization stack is wired centrally by `AddOtOpcUaAuth` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`), which also installs a `FallbackPolicy` that requires an authenticated user. Razor pages gate inline with the canonical role names, e.g. `@attribute [Authorize(Roles = "Administrator,Designer")]`. Nav-menu sections hide via `<AuthorizeView>`.
|
||||
|
||||
### Role grant source
|
||||
|
||||
Admin reads `LdapGroupRoleMapping` rows from the Config DB (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Entities/LdapGroupRoleMapping.cs`) — the same pattern as the data-plane `NodeAcl` but scoped to Admin roles + (optionally) cluster scope for multi-site fleets. The `RoleGrants.razor` page lets FleetAdmins edit these mappings without leaving the UI.
|
||||
Admin reads `LdapGroupRoleMapping` rows from the Config DB (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Entities/LdapGroupRoleMapping.cs`) — the same pattern as the data-plane `NodeAcl` but scoped to Admin roles + (optionally) one cluster for multi-site fleets (a system-wide row, `IsSystemWide = true`, stacks additively with cluster-scoped rows). The `RoleGrants.razor` page lets `Administrator`s edit these mappings without leaving the UI.
|
||||
|
||||
### Headless deploy API (`POST /api/deployments`)
|
||||
|
||||
For CI / scripts that need to trigger a deployment without driving the Blazor "Deploy current configuration" button, admin-role nodes expose `POST /api/deployments` (`DeployApiEndpoints`, `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Api/DeployApiEndpoints.cs`). It forwards to the same `IAdminOperationsClient.StartDeploymentAsync` the button calls.
|
||||
|
||||
Auth is a **single configured secret** checked from the `X-Api-Key` header in fixed time — deliberately orthogonal to the cookie-only web auth (`OPC UA Authentication` above) so automation needs no LDAP login round-trip. The endpoint is `AllowAnonymous` so the `FallbackPolicy` doesn't 401 it, and enforces the key itself. **It self-disables (503) until `Security:DeployApiKey` is set**, so it is never open by default.
|
||||
|
||||
```bash
|
||||
curl -X POST https://<admin-host>/api/deployments \
|
||||
-H 'X-Api-Key: <Security:DeployApiKey>' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"createdBy":"ci-bot"}'
|
||||
```
|
||||
|
||||
Responses: `202 Accepted` (`{ outcome, deploymentId, revisionHash }`) when a deployment was sealed, `200` for `NoChanges`, `409` when another deployment is in flight, `422` when rejected, `401` for a missing/wrong key, `503` when unconfigured. Set the secret via `Security:DeployApiKey` (env `Security__DeployApiKey`) on admin nodes only; treat it like any deploy credential (rotate, keep out of source).
|
||||
|
||||
---
|
||||
|
||||
@@ -266,9 +289,9 @@ Admin reads `LdapGroupRoleMapping` rows from the Config DB (`src/Core/ZB.MOM.WW.
|
||||
|
||||
Per-capability resilience (retry, timeout, circuit-breaker, bulkhead) is applied by `CapabilityInvoker` in `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/`. A driver-capability call made **outside** the invoker bypasses resilience entirely — which in production looks like inconsistent timeouts, un-wrapped retries, and unbounded blocking.
|
||||
|
||||
`OTOPCUA0001` (Roslyn analyzer at `src/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers/UnwrappedCapabilityCallAnalyzer.cs`) fires as a compile-time **warning** when an `async`/`Task`-returning method on one of the seven guarded capability interfaces (`IReadable`, `IWritable`, `ITagDiscovery`, `ISubscribable`, `IHostConnectivityProbe`, `IAlarmSource`, `IHistoryProvider`) is invoked **outside** a lambda passed to `CapabilityInvoker.ExecuteAsync` / `ExecuteWriteAsync` / `AlarmSurfaceInvoker.*`. The analyzer walks up the syntax tree from the call site, finds any enclosing invoker invocation, and verifies the call lives transitively inside that invocation's anonymous-function argument — a sibling pattern (do the call, then invoke `ExecuteAsync` on something unrelated nearby) does not satisfy the rule.
|
||||
`OTOPCUA0001` (Roslyn analyzer at `src/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers/UnwrappedCapabilityCallAnalyzer.cs`) fires with category `OtOpcUa.Resilience` and default severity **Warning** (per `AnalyzerReleases.Shipped.md`) when a method on one of the seven guarded capability interfaces (`IReadable`, `IWritable`, `ITagDiscovery`, `ISubscribable`, `IHostConnectivityProbe`, `IAlarmSource`, `IHistoryProvider` — all in `ZB.MOM.WW.OtOpcUa.Core.Abstractions`) is invoked **outside** a lambda passed to `CapabilityInvoker.ExecuteAsync` / `ExecuteWriteAsync`. `AlarmSurfaceInvoker` is **not** a wrapper home — its own implementation is covered transitively because it routes through the inner `CapabilityInvoker.ExecuteAsync`. The analyzer walks up the syntax tree from the call site, finds any enclosing invoker invocation, and verifies the call lives transitively inside that invocation's anonymous-function argument — a sibling pattern (do the call, then invoke `ExecuteAsync` on something unrelated nearby) does not satisfy the rule.
|
||||
|
||||
Five xUnit-v3 + Shouldly tests at `tests/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers.Tests` cover the common fail/pass shapes + the sibling-pattern regression guard.
|
||||
The xunit.v3 + Shouldly suite at `tests/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers.Tests/UnwrappedCapabilityCallAnalyzerTests.cs` covers the common fail/pass shapes + the sibling-pattern regression guard.
|
||||
|
||||
The rule is intentionally scoped to async surfaces — pure in-memory accessors like `IHostConnectivityProbe.GetHostStatuses()` return synchronously and do not require the invoker wrap.
|
||||
|
||||
@@ -276,8 +299,8 @@ The rule is intentionally scoped to async surfaces — pure in-memory accessors
|
||||
|
||||
## Audit Logging
|
||||
|
||||
- **Server**: Serilog `AUDIT:` prefix on every authentication success/failure, certificate validation result, write access denial. Written alongside the regular rolling file sink.
|
||||
- **Admin**: `AuditLogService` writes `ConfigAuditLog` rows to the Config DB for every publish, rollback, cluster-node CRUD, credential rotation. Visible in the Audit page for operators with `ConfigViewer` or above.
|
||||
- **Server**: authentication, certificate-validation, and write-denial events are logged through the regular Serilog rolling file sink.
|
||||
- **Admin**: `AuditWriterActor` (`src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Audit/AuditWriterActor.cs`) writes `ConfigAuditLog` rows (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Entities/ConfigAuditLog.cs`) to the Config DB for publish, rollback, cluster-node CRUD, and credential rotation. Visible on the cluster Audit page (`ClusterAudit.razor`) for operators with `Viewer` or above.
|
||||
|
||||
---
|
||||
|
||||
@@ -285,16 +308,16 @@ The rule is intentionally scoped to async surfaces — pure in-memory accessors
|
||||
|
||||
### Certificate trust failure
|
||||
|
||||
Check `{PkiStoreRoot}/rejected/` for the client's cert. Promote via Admin UI Certificates page, or copy the `.der` file manually to `trusted/`.
|
||||
Check `{PkiStoreRoot}/rejected/` for the client's cert. Copy the `.der` file to `trusted/certs/`; the SDK trust list reloads on the next handshake. The Admin UI Certificates page shows what is in each store but does not move certs.
|
||||
|
||||
### LDAP users can connect but fail authorization
|
||||
|
||||
Verify (a) `OpcUaServer:Ldap:GroupAttribute` returns groups in the form `CN=MyGroup,…` (OtOpcUa strips the `CN=` for matching), (b) a `NodeAcl` grant exists at any level of the node's UNS path that unions to the required permission, (c) `Authorization:StrictMode` is correctly set for the deployment stage.
|
||||
Verify (a) `Security:Ldap:GroupAttribute` (default `memberOf`) returns the user's groups, (b) `Security:Ldap:GroupToRole` maps those groups to the expected roles, and (c) a `NodeAcl` grant exists at some level of the node's scope path that unions to the required permission. The data-plane evaluator is strictly default-deny — there is no fail-open mode to fall back on.
|
||||
|
||||
### LDAP bind rejected as "insecure"
|
||||
|
||||
Set `UseTls = true` + `Port = 636`, or temporarily flip `AllowInsecureLdap = true` in dev. Production Active Directory increasingly refuses plain-LDAP bind under LDAP-signing enforcement.
|
||||
Set `Security:Ldap:Transport = "Ldaps"` (or `"StartTls"`) with the matching port (636 for AD `Ldaps`), or temporarily set `Security:Ldap:AllowInsecure = true` in dev. Production Active Directory increasingly refuses plain-LDAP bind under LDAP-signing enforcement.
|
||||
|
||||
### `AuthorizationGate` denies every call after a publish
|
||||
### Stale ACL trie after a publish
|
||||
|
||||
`AclChangeNotifier` invalidates the `PermissionTrieCache` on publish; a stuck cache is usually a missed notification. Restart the Server as a quick mitigation and file a bug — the design is to stay fresh without restarts.
|
||||
A publish installs a **new generation** into `PermissionTrieCache` via `PermissionTrieBuilder` rather than signalling an invalidation; the evaluator binds each session to a generation. If grants appear stale, confirm the new generation was installed (publish completed) and that sessions re-resolved their auth state — a session past its staleness ceiling fails closed and must re-authenticate. As a last resort `PermissionTrieCache.Invalidate(clusterId)` drops a cluster's cached tries.
|
||||
|
||||
@@ -65,7 +65,7 @@ Running record of v2 dev services on the Windows dev VM. Updated on every instal
|
||||
|---------|---------------------|---------|-----------|------------------------|---------------|--------|
|
||||
| **Central config DB** | Docker container `otopcua-mssql` on the Linux Docker host (image `mcr.microsoft.com/mssql/server:2022-latest`) | 16.0.4250.1 (RTM-CU24-GDR, KB5083252) | `10.100.0.35:14330` → `1433` (container) — port 14330 retained from the previous local-container setup so connection-string ports don't churn | User `sa` / Password `OtOpcUaDev_2026!` | Docker named volume `otopcua-mssql-data` on the Docker host | ✅ Running on Docker host (`/opt/otopcua-mssql/`) since 2026-04-28; carries `project=lmxopcua` label |
|
||||
| Dev Galaxy (AVEVA System Platform) | Local install on this dev box — full ArchestrA + Historian + OI-Server stack | v1 baseline | Local COM via MXAccess (`C:\Program Files (x86)\ArchestrA\Framework\bin\ArchestrA.MXAccess.dll`); Historian via `aaH*` services; SuiteLink via `slssvc` | Windows Auth | Galaxy repository DB `ZB` on local SQL Server (separate instance from `otopcua-mssql` — legacy v1 Galaxy DB, not related to v2 config DB) | ✅ **Fully available — Phase 2 lift unblocked.** 27 ArchestrA / AVEVA / Wonderware services running incl. `aaBootstrap`, `aaGR` (Galaxy Repository), `aaLogger`, `aaUserValidator`, `aaPim`, `ArchestrADataStore`, `AsbServiceManager`, `AutoBuild_Service`; full Historian set (`aahClientAccessPoint`, `aahGateway`, `aahInSight`, `aahSearchIndexer`, `aahSupervisor`, `InSQLStorage`, `InSQLConfiguration`, `InSQLEventSystem`, `InSQLIndexing`, `InSQLIOServer`, `InSQLManualStorage`, `InSQLSystemDriver`, `HistorianSearch-x64`); `slssvc` (Wonderware SuiteLink); `OI-Gateway` install present at `C:\Program Files (x86)\Wonderware\OI-Server\OI-Gateway\` (decision #142 AppServer-via-OI-Gateway smoke test now also unblocked) |
|
||||
| GLAuth (LDAP) | Local install at `C:\publish\glauth\` | v2.4.0 | `localhost:3893` (LDAP) / `3894` (LDAPS, disabled) | Direct-bind `cn={user},dc=lmxopcua,dc=local` per `auth.md`; users `readonly`/`writeop`/`writetune`/`writeconfig`/`alarmack`/`admin`/`serviceaccount` (passwords in `glauth.cfg` as SHA-256) | `C:\publish\glauth\` | ✅ Running (NSSM service `GLAuth`). Phase 1 Admin uses GroupToRole map `ReadOnly→ConfigViewer`, `WriteOperate→ConfigEditor`, `AlarmAck→FleetAdmin`. v2-rebrand to `dc=otopcua,dc=local` is a future cosmetic change |
|
||||
| GLAuth (LDAP) | Docker container `zb-shared-glauth` on the Linux Docker host — managed via `scadaproj/infra/glauth/` | v2.4.0 | `10.100.0.35:3893` (LDAP plaintext; LDAPS disabled) | Bind account `cn=serviceaccount,dc=zb,dc=local` / `serviceaccount123`; all test users password `password`; baseDN `dc=zb,dc=local` | `scadaproj/infra/glauth/` (source of truth + deploy/verify runbook) | ✅ Running on Docker host. Shared across OtOpcUa, MxAccessGateway, ScadaBridge. OtOpcUa groups: `OtOpcUa-Admins`→Administrator, `OtOpcUa-Designers`→Designer, `OtOpcUa-Viewers`→Viewer. The per-VM NSSM service at `C:\publish\glauth\` and old base DNs `dc=lmxopcua,dc=local` / `dc=otopcua,dc=local` are obsolete. |
|
||||
| OPC Foundation reference server | Not yet built | — | `10.100.0.35:62541` (target) | `user1` / `password1` (reference-server defaults) | — | Pending (needed for Phase 5 OPC UA Client driver testing) |
|
||||
| FOCAS TCP stub | Not yet built | — | `10.100.0.35:8193` (target) | n/a | — | Pending (built in Phase 5; runs on Docker host) |
|
||||
| Modbus simulator (`otopcua-pymodbus:3.13.0`) | Docker compose at `/opt/otopcua-modbus/` on Docker host | pinned 3.13.0 | `10.100.0.35:5020` | n/a | n/a | Stack staged; bring up with `lmxopcua-fix up modbus <profile>` from this VM |
|
||||
@@ -85,17 +85,19 @@ Copy-paste-ready. The checked-in `appsettings.json` defaults already point at th
|
||||
},
|
||||
"Authentication": {
|
||||
"Ldap": {
|
||||
"Host": "localhost",
|
||||
"Host": "10.100.0.35",
|
||||
"Port": 3893,
|
||||
"UseLdaps": false,
|
||||
"BindDn": "cn=admin,dc=otopcua,dc=local",
|
||||
"BindPassword": "<see glauth-otopcua.cfg — pending seeding>"
|
||||
"Transport": "None",
|
||||
"AllowInsecure": true,
|
||||
"SearchBase": "dc=zb,dc=local",
|
||||
"ServiceAccountDn": "cn=serviceaccount,dc=zb,dc=local",
|
||||
"ServiceAccountPassword": "serviceaccount123"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
LDAP host stays `localhost` because GLAuth still runs as a native NSSM service on this dev VM (not yet migrated to the Docker host).
|
||||
LDAP now points at the shared GLAuth on the Linux Docker host (`10.100.0.35:3893`, baseDN `dc=zb,dc=local`). The per-VM NSSM service at `C:\publish\glauth\` is obsolete. See `scadaproj/infra/glauth/` for the source of truth and deploy runbook.
|
||||
|
||||
For xUnit test fixtures that need a throwaway DB per test run, build connection strings with `Database=OtOpcUaConfig_Test_{timestamp}` to avoid cross-run pollution.
|
||||
|
||||
@@ -139,7 +141,7 @@ Dev credentials in this inventory are convenience defaults, not secrets. Change
|
||||
| Resource | Purpose | Type | Default port | Default credentials | Owner |
|
||||
|----------|---------|------|--------------|---------------------|-------|
|
||||
| **SQL Server 2022 dev edition** | Central config DB; integration tests against `Configuration` project | Local install OR Docker container `mcr.microsoft.com/mssql/server:2022-latest` | 1433 default, or 14330 when a native MSSQL instance (e.g. the Galaxy `ZB` host) already occupies 1433 | `sa` / `OtOpcUaDev_2026!` (dev only — production uses Integrated Security or gMSA per decision #46) | Developer (per machine) |
|
||||
| **GLAuth (LDAP server)** | Admin UI authentication tests; data-path ACL evaluation tests | Local binary at `C:\publish\glauth\` per existing CLAUDE.md | 3893 (LDAP) / 3894 (LDAPS) | Service principal: `cn=admin,dc=otopcua,dc=local` / `OtOpcUaDev_2026!`; test users defined in GLAuth config | Developer (per machine) |
|
||||
| **GLAuth (LDAP server)** | Admin UI authentication tests; data-path ACL evaluation tests | Shared Docker container `zb-shared-glauth` on the Linux Docker host at `10.100.0.35:3893` — managed via `scadaproj/infra/glauth/`; no per-developer install required | 3893 (LDAP plaintext) | Bind `cn=serviceaccount,dc=zb,dc=local` / `serviceaccount123`; baseDN `dc=zb,dc=local`; test users password `password` | Shared (Docker host — `scadaproj/infra/glauth/`) |
|
||||
| **Local dev Galaxy** (Aveva System Platform) | Galaxy driver tests; v1 IntegrationTests parity | Existing on dev box per CLAUDE.md | n/a (local COM) | Windows Auth | Developer (already present per project setup) |
|
||||
|
||||
### C. Integration host (one dedicated Windows machine the team shares)
|
||||
|
||||
@@ -104,8 +104,8 @@ Anonymous OPC UA sessions are denied writes against `Operate`-classified tags by
|
||||
"Enabled": true,
|
||||
"Server": "localhost",
|
||||
"Port": 3893,
|
||||
"SearchBase": "dc=lmxopcua,dc=local",
|
||||
"ServiceAccountDn": "cn=serviceaccount,dc=lmxopcua,dc=local",
|
||||
"SearchBase": "dc=zb,dc=local",
|
||||
"ServiceAccountDn": "cn=serviceaccount,dc=zb,dc=local",
|
||||
"ServiceAccountPassword": "serviceaccount123",
|
||||
"GroupToRole": {
|
||||
"ReadOnly": "ReadOnly",
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Audit;
|
||||
|
||||
/// <summary>
|
||||
/// Cluster-broadcast audit event consumed by the <c>AuditWriterActor</c> singleton, which
|
||||
/// batches and idempotently inserts into <c>ConfigAuditLog</c>.
|
||||
/// </summary>
|
||||
public sealed record AuditEvent(
|
||||
Guid EventId,
|
||||
string Category,
|
||||
string Action,
|
||||
string Actor,
|
||||
DateTime OccurredAtUtc,
|
||||
string? DetailsJson,
|
||||
NodeId SourceNode,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Akka"/>
|
||||
<PackageReference Include="ZB.MOM.WW.Audit"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -41,4 +41,10 @@ public sealed class ConfigAuditLog
|
||||
/// <summary>Correlation ID from <c>AuditEvent.CorrelationId</c> so an audit row joins to its
|
||||
/// originating request/workflow. Nullable for the same backfill reason as <see cref="EventId"/>.</summary>
|
||||
public Guid? CorrelationId { get; set; }
|
||||
|
||||
/// <summary>Normalized outcome from <c>AuditEvent.Outcome</c> (the canonical
|
||||
/// <c>ZB.MOM.WW.Audit.AuditOutcome</c>: <c>Success</c> | <c>Failure</c> | <c>Denied</c>),
|
||||
/// stored as its enum member name. Nullable so pre-Outcome rows backfill cleanly and the
|
||||
/// bespoke stored-procedure audit path (which does not derive an outcome) writes NULL.</summary>
|
||||
public string? Outcome { get; set; }
|
||||
}
|
||||
|
||||
@@ -7,20 +7,31 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
/// <see cref="Entities.NodeAcl"/> joined against LDAP group memberships directly.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/plan.md</c> decision #150 the two concerns share zero runtime code path:
|
||||
/// the control plane (Admin UI) consumes <see cref="Entities.LdapGroupRoleMapping"/>; the
|
||||
/// data plane consumes <see cref="Entities.NodeAcl"/> rows directly. Having them in one
|
||||
/// table would collapse the distinction + let a user inherit tag permissions via their
|
||||
/// admin-role claim path.
|
||||
/// <para>
|
||||
/// Per <c>docs/v2/plan.md</c> decision #150 the two concerns share zero runtime code path:
|
||||
/// the control plane (Admin UI) consumes <see cref="Entities.LdapGroupRoleMapping"/>; the
|
||||
/// data plane consumes <see cref="Entities.NodeAcl"/> rows directly. Having them in one
|
||||
/// table would collapse the distinction + let a user inherit tag permissions via their
|
||||
/// admin-role claim path.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Task 1.7 standardized the member names on the canonical control-plane role vocabulary
|
||||
/// (<c>ZB.MOM.WW.Auth</c> <c>CanonicalRole</c>): <c>ConfigViewer → Viewer</c>,
|
||||
/// <c>ConfigEditor → Designer</c>, <c>FleetAdmin → Administrator</c>. The appsettings-only
|
||||
/// <c>DriverOperator</c> string role likewise became <c>Operator</c>. These members persist
|
||||
/// as their string names (EF <c>HasConversion<string></c>); the rename is paired with
|
||||
/// a data migration (<c>CanonicalizeAdminRoles</c>) that rewrites existing rows. This is a
|
||||
/// rename, not a permission change — enforcement semantics are preserved.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public enum AdminRole
|
||||
{
|
||||
/// <summary>Read-only Admin UI access — can view cluster state, drafts, publish history.</summary>
|
||||
ConfigViewer,
|
||||
/// <summary>Read-only Admin UI access — can view cluster state, drafts, publish history. (Canonical: Viewer; was ConfigViewer.)</summary>
|
||||
Viewer,
|
||||
|
||||
/// <summary>Can author drafts + submit for publish.</summary>
|
||||
ConfigEditor,
|
||||
/// <summary>Can author drafts + submit for publish. (Canonical: Designer; was ConfigEditor.)</summary>
|
||||
Designer,
|
||||
|
||||
/// <summary>Full Admin UI privileges including publish + fleet-admin actions.</summary>
|
||||
FleetAdmin,
|
||||
/// <summary>Full Admin UI privileges including publish + fleet-admin actions. (Canonical: Administrator; was FleetAdmin.)</summary>
|
||||
Administrator,
|
||||
}
|
||||
|
||||
+1755
File diff suppressed because it is too large
Load Diff
+39
@@ -0,0 +1,39 @@
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Migrations
|
||||
{
|
||||
/// <summary>
|
||||
/// Task 1.7 — canonicalizes the control-plane admin role VALUES persisted in the
|
||||
/// <c>LdapGroupRoleMapping.Role</c> column. The column stores the <c>AdminRole</c> enum
|
||||
/// member name as a string (EF <c>HasConversion<string></c>, <c>nvarchar(32)</c>);
|
||||
/// renaming the enum members (<c>ConfigViewer → Viewer</c>, <c>ConfigEditor → Designer</c>,
|
||||
/// <c>FleetAdmin → Administrator</c>) therefore requires rewriting existing rows so the C#
|
||||
/// enum and the stored strings stay in sync.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This is a pure DATA migration: the schema (column type, length, indexes) is unchanged,
|
||||
/// so the model snapshot is byte-identical to the prior migration. The new canonical strings
|
||||
/// ("Viewer" = 6, "Designer" = 8, "Administrator" = 13 chars) all fit the existing
|
||||
/// <c>nvarchar(32)</c> column. Enforcement semantics are preserved — it is a rename only.
|
||||
/// </remarks>
|
||||
public partial class CanonicalizeAdminRoles : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.Sql("UPDATE [LdapGroupRoleMapping] SET [Role] = N'Viewer' WHERE [Role] = N'ConfigViewer';");
|
||||
migrationBuilder.Sql("UPDATE [LdapGroupRoleMapping] SET [Role] = N'Designer' WHERE [Role] = N'ConfigEditor';");
|
||||
migrationBuilder.Sql("UPDATE [LdapGroupRoleMapping] SET [Role] = N'Administrator' WHERE [Role] = N'FleetAdmin';");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.Sql("UPDATE [LdapGroupRoleMapping] SET [Role] = N'FleetAdmin' WHERE [Role] = N'Administrator';");
|
||||
migrationBuilder.Sql("UPDATE [LdapGroupRoleMapping] SET [Role] = N'ConfigEditor' WHERE [Role] = N'Designer';");
|
||||
migrationBuilder.Sql("UPDATE [LdapGroupRoleMapping] SET [Role] = N'ConfigViewer' WHERE [Role] = N'Viewer';");
|
||||
}
|
||||
}
|
||||
}
|
||||
+1759
File diff suppressed because it is too large
Load Diff
+35
@@ -0,0 +1,35 @@
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Migrations
|
||||
{
|
||||
/// <summary>
|
||||
/// Task 2.2 — adds the nullable <c>Outcome</c> column to <c>ConfigAuditLog</c> for the
|
||||
/// canonical <c>ZB.MOM.WW.Audit.AuditOutcome</c> (stored as its enum member name,
|
||||
/// <c>nvarchar(16)</c>, mirroring how <c>AdminRole</c> is persisted). Purely additive:
|
||||
/// nullable with no backfill, so existing rows and the bespoke stored-procedure audit
|
||||
/// path (which does not derive an outcome) keep writing NULL.
|
||||
/// </summary>
|
||||
public partial class AddConfigAuditLogOutcome : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.AddColumn<string>(
|
||||
name: "Outcome",
|
||||
table: "ConfigAuditLog",
|
||||
type: "nvarchar(16)",
|
||||
maxLength: 16,
|
||||
nullable: true);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.DropColumn(
|
||||
name: "Outcome",
|
||||
table: "ConfigAuditLog");
|
||||
}
|
||||
}
|
||||
}
|
||||
+4
@@ -186,6 +186,10 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration.Migrations
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<string>("Outcome")
|
||||
.HasMaxLength(16)
|
||||
.HasColumnType("nvarchar(16)");
|
||||
|
||||
b.Property<string>("Principal")
|
||||
.IsRequired()
|
||||
.HasMaxLength(128)
|
||||
|
||||
@@ -445,6 +445,9 @@ public sealed class OtOpcUaConfigDbContext(DbContextOptions<OtOpcUaConfigDbConte
|
||||
e.Property(x => x.DetailsJson).HasColumnType("nvarchar(max)");
|
||||
e.Property(x => x.EventId);
|
||||
e.Property(x => x.CorrelationId);
|
||||
// Stored as the AuditOutcome enum member name (mirrors AdminRole's string storage):
|
||||
// "Success" | "Failure" | "Denied" all fit nvarchar(16). Nullable for legacy + SP-path rows.
|
||||
e.Property(x => x.Outcome).HasMaxLength(16);
|
||||
|
||||
e.HasIndex(x => new { x.ClusterId, x.Timestamp })
|
||||
.IsDescending(false, true)
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Queries;
|
||||
|
||||
/// <summary>
|
||||
/// Shared query for the cluster-scoped audit view. Audit rows reach <c>ConfigAuditLog</c> by two
|
||||
/// paths that stamp different columns:
|
||||
/// <list type="bullet">
|
||||
/// <item>the bespoke stored-procedure path stamps <c>ClusterId</c> directly;</item>
|
||||
/// <item>the structured <c>AuditWriterActor</c> path stamps <c>NodeId</c> (leaving
|
||||
/// <c>ClusterId</c> null).</item>
|
||||
/// </list>
|
||||
/// A cluster-scoped view must surface both, so this query matches rows whose <c>ClusterId</c>
|
||||
/// equals the cluster <em>or</em> whose <c>NodeId</c> belongs to a node in the cluster
|
||||
/// (membership from <see cref="ClusterNode"/>: <c>NodeId → ClusterId</c>).
|
||||
/// </summary>
|
||||
public static class ClusterAuditQuery
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns the newest <paramref name="pageSize"/> audit rows visible for
|
||||
/// <paramref name="clusterId"/>, newest first. Executes one query to resolve the cluster's
|
||||
/// node IDs, then one filtered query against <c>ConfigAuditLog</c>.
|
||||
/// </summary>
|
||||
/// <param name="db">The config database context.</param>
|
||||
/// <param name="clusterId">The cluster whose audit rows to fetch.</param>
|
||||
/// <param name="pageSize">Maximum number of rows to return.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The matching audit rows, newest first.</returns>
|
||||
public static async Task<List<ConfigAuditLog>> ForClusterAsync(
|
||||
OtOpcUaConfigDbContext db, string clusterId, int pageSize, CancellationToken ct = default)
|
||||
{
|
||||
var nodeIds = await db.ClusterNodes.AsNoTracking()
|
||||
.Where(n => n.ClusterId == clusterId)
|
||||
.Select(n => n.NodeId)
|
||||
.ToListAsync(ct);
|
||||
|
||||
return await db.ConfigAuditLogs.AsNoTracking()
|
||||
.Where(a => a.ClusterId == clusterId
|
||||
|| (a.ClusterId == null && a.NodeId != null && nodeIds.Contains(a.NodeId)))
|
||||
.OrderByDescending(a => a.Timestamp)
|
||||
.Take(pageSize)
|
||||
.ToListAsync(ct);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.AdminUI.Api;
|
||||
|
||||
/// <summary>
|
||||
/// Headless REST gateway over the admin-operations cluster singleton, for CI / scripts that
|
||||
/// need to trigger a deployment without driving the Blazor AdminUI. Mounted on admin-role nodes
|
||||
/// only (where <see cref="IAdminOperationsClient"/> + the singleton proxy live), it forwards to
|
||||
/// the same <see cref="IAdminOperationsClient.StartDeploymentAsync"/> the "Deploy current
|
||||
/// configuration" button calls.
|
||||
/// <para>
|
||||
/// Auth is a single configured secret (<c>Security:DeployApiKey</c>) checked from the
|
||||
/// <c>X-Api-Key</c> header in fixed time — deliberately orthogonal to the cookie-only web
|
||||
/// auth so automation needs no LDAP login round-trip. The endpoint is <c>AllowAnonymous</c>
|
||||
/// (so the global <c>RequireAuthenticatedUser</c> fallback doesn't 401 it) and enforces the
|
||||
/// key itself; when the key is unconfigured the endpoint is disabled (503), so it is never
|
||||
/// open.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public static class DeployApiEndpoints
|
||||
{
|
||||
/// <summary>Header carrying the deploy API key.</summary>
|
||||
public const string ApiKeyHeader = "X-Api-Key";
|
||||
/// <summary>Configuration key holding the deploy API secret.</summary>
|
||||
public const string ConfigKey = "Security:DeployApiKey";
|
||||
|
||||
/// <summary>Maps <c>POST /api/deployments</c> — the headless deploy trigger. No-op-safe to call
|
||||
/// on any admin-role node; the handler self-disables (503) until <c>Security:DeployApiKey</c> is set.</summary>
|
||||
/// <param name="app">The endpoint route builder.</param>
|
||||
/// <param name="config">Application configuration (read for the deploy API key).</param>
|
||||
/// <returns>The same <paramref name="app"/> for chaining.</returns>
|
||||
public static IEndpointRouteBuilder MapOtOpcUaDeployApi(this IEndpointRouteBuilder app, IConfiguration config)
|
||||
{
|
||||
var configuredKey = config[ConfigKey];
|
||||
|
||||
app.MapPost("/api/deployments", async (
|
||||
HttpContext http,
|
||||
DeployRequest? body,
|
||||
IAdminOperationsClient adminOps,
|
||||
CancellationToken ct) =>
|
||||
{
|
||||
if (string.IsNullOrEmpty(configuredKey))
|
||||
return Results.Problem(
|
||||
"Deploy API is disabled. Set Security:DeployApiKey to enable it.",
|
||||
statusCode: StatusCodes.Status503ServiceUnavailable);
|
||||
|
||||
if (!IsAuthorized(http.Request.Headers[ApiKeyHeader].ToString(), configuredKey))
|
||||
return Results.Unauthorized();
|
||||
|
||||
var createdBy = string.IsNullOrWhiteSpace(body?.CreatedBy) ? "rest-api" : body!.CreatedBy!.Trim();
|
||||
var result = await adminOps.StartDeploymentAsync(createdBy, ct);
|
||||
return ToResult(result);
|
||||
})
|
||||
.AllowAnonymous() // gated by the API key, not the cookie auth fallback
|
||||
.DisableAntiforgery() // machine endpoint, not a browser form post
|
||||
.WithName("StartDeployment");
|
||||
|
||||
return app;
|
||||
}
|
||||
|
||||
/// <summary>Fixed-time compare of the supplied key against the configured secret. False when
|
||||
/// either is empty (so an unconfigured key never authorizes).</summary>
|
||||
/// <param name="provided">The caller-supplied key (from the <c>X-Api-Key</c> header).</param>
|
||||
/// <param name="configuredKey">The configured secret.</param>
|
||||
/// <returns><see langword="true"/> only when both are non-empty and equal.</returns>
|
||||
public static bool IsAuthorized(string? provided, string? configuredKey)
|
||||
{
|
||||
if (string.IsNullOrEmpty(configuredKey) || string.IsNullOrEmpty(provided)) return false;
|
||||
return CryptographicOperations.FixedTimeEquals(
|
||||
Encoding.UTF8.GetBytes(provided), Encoding.UTF8.GetBytes(configuredKey));
|
||||
}
|
||||
|
||||
/// <summary>Map a <see cref="StartDeploymentResult"/> to its HTTP result: Accepted → 202 (with a
|
||||
/// Location), NoChanges → 200, AnotherDeploymentInFlight → 409, Rejected → 422, anything else → 500.</summary>
|
||||
/// <param name="r">The deployment outcome from the admin-operations singleton.</param>
|
||||
/// <returns>The corresponding HTTP result.</returns>
|
||||
public static IResult ToResult(StartDeploymentResult r) => r.Outcome switch
|
||||
{
|
||||
StartDeploymentOutcome.Accepted => Results.Accepted(
|
||||
$"/api/deployments/{r.DeploymentId?.Value}",
|
||||
new DeployResponse(r.Outcome.ToString(), r.DeploymentId?.Value, r.RevisionHash?.Value)),
|
||||
StartDeploymentOutcome.NoChanges => Results.Ok(
|
||||
new DeployResponse(r.Outcome.ToString(), r.DeploymentId?.Value, r.RevisionHash?.Value)),
|
||||
StartDeploymentOutcome.AnotherDeploymentInFlight => Results.Conflict(
|
||||
new DeployResponse(r.Outcome.ToString(), DeploymentId: null, RevisionHash: null)),
|
||||
StartDeploymentOutcome.Rejected => Results.Problem(
|
||||
r.Message ?? "Deployment rejected.", statusCode: StatusCodes.Status422UnprocessableEntity),
|
||||
_ => Results.Problem($"Unexpected deployment outcome: {r.Outcome}",
|
||||
statusCode: StatusCodes.Status500InternalServerError),
|
||||
};
|
||||
|
||||
/// <summary>Optional request body. <c>CreatedBy</c> is recorded as the deployment's initiator
|
||||
/// (audit); defaults to <c>"rest-api"</c> when absent.</summary>
|
||||
/// <param name="CreatedBy">Who/what initiated the deploy (for the audit trail).</param>
|
||||
public sealed record DeployRequest(string? CreatedBy);
|
||||
|
||||
/// <summary>Response body for a deploy trigger.</summary>
|
||||
/// <param name="Outcome">The <see cref="StartDeploymentOutcome"/> name.</param>
|
||||
/// <param name="DeploymentId">The new deployment id when one was created; otherwise null.</param>
|
||||
/// <param name="RevisionHash">The config revision hash, when known.</param>
|
||||
public sealed record DeployResponse(string Outcome, Guid? DeploymentId, string? RevisionHash);
|
||||
}
|
||||
@@ -14,14 +14,14 @@
|
||||
<title>OtOpcUa Admin</title>
|
||||
<base href="/"/>
|
||||
<link rel="stylesheet" href="_content/ZB.MOM.WW.OtOpcUa.AdminUI/lib/bootstrap/css/bootstrap.min.css"/>
|
||||
<link rel="stylesheet" href="_content/ZB.MOM.WW.OtOpcUa.AdminUI/css/theme.css"/>
|
||||
<ThemeHead />
|
||||
<link rel="stylesheet" href="_content/ZB.MOM.WW.OtOpcUa.AdminUI/css/site.css"/>
|
||||
<HeadOutlet/>
|
||||
</head>
|
||||
<body>
|
||||
<Routes/>
|
||||
<script src="_content/ZB.MOM.WW.OtOpcUa.AdminUI/lib/bootstrap/js/bootstrap.bundle.min.js"></script>
|
||||
<script src="_content/ZB.MOM.WW.OtOpcUa.AdminUI/js/nav-state.js"></script>
|
||||
<ThemeScripts />
|
||||
<script src="_framework/blazor.web.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -1,28 +1,55 @@
|
||||
@inherits LayoutComponentBase
|
||||
@using System.Security.Claims
|
||||
|
||||
@* Layout chrome ported from ScadaLink CentralUI: no separate top bar — brand sits
|
||||
at the top of the side rail. The sidebar itself is the interactive island
|
||||
(<NavSidebar/>); MainLayout stays statically rendered so the Body RenderFragment
|
||||
doesn't have to cross an interactive boundary. *@
|
||||
@* Thin delegation to the shared ZB.MOM.WW.Theme side-rail chassis. ThemeShell owns
|
||||
the brand bar, the CSS-only narrow-viewport hamburger, and the responsive collapse,
|
||||
so MainLayout no longer carries its own .app-shell / hamburger wrapper. Nav sections
|
||||
are static <details> (NavRailSection) whose expand state is persisted to localStorage
|
||||
by the kit's <ThemeScripts/> (emitted in App.razor) — replacing the old interactive
|
||||
NavSidebar island + cookie/URL auto-expand. *@
|
||||
|
||||
<div class="app-shell d-flex flex-column flex-lg-row">
|
||||
@* Hamburger toggle: visible only on viewports <lg.
|
||||
Bootstrap collapse JS lives in bootstrap.bundle.min.js (loaded in App.razor). *@
|
||||
<button class="btn btn-outline-secondary btn-sm d-lg-none m-2 align-self-start"
|
||||
type="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#sidebar-collapse"
|
||||
aria-controls="sidebar-collapse"
|
||||
aria-expanded="false"
|
||||
aria-label="Toggle navigation">
|
||||
☰
|
||||
</button>
|
||||
|
||||
<div class="collapse d-lg-block" id="sidebar-collapse">
|
||||
<NavSidebar />
|
||||
</div>
|
||||
|
||||
<main class="page">
|
||||
@Body
|
||||
</main>
|
||||
</div>
|
||||
<ThemeShell Product="OtOpcUa" Accent="#2f5fd0">
|
||||
<Nav>
|
||||
<NavRailSection Title="Navigation" Key="nav">
|
||||
<NavRailItem Href="/" Text="Overview" Match="NavLinkMatch.All" />
|
||||
<NavRailItem Href="/fleet" Text="Fleet status" />
|
||||
<NavRailItem Href="/hosts" Text="Host status" />
|
||||
<NavRailItem Href="/clusters" Text="Clusters" />
|
||||
<NavRailItem Href="/reservations" Text="Reservations" />
|
||||
<NavRailItem Href="/certificates" Text="Certificates" />
|
||||
<NavRailItem Href="/role-grants" Text="Role grants" />
|
||||
</NavRailSection>
|
||||
<NavRailSection Title="Scripting" Key="scripting">
|
||||
<NavRailItem Href="/virtual-tags" Text="Virtual tags" />
|
||||
<NavRailItem Href="/scripted-alarms" Text="Scripted alarms" />
|
||||
<NavRailItem Href="/scripts" Text="Scripts" />
|
||||
<NavRailItem Href="/script-log" Text="Script log" />
|
||||
</NavRailSection>
|
||||
<NavRailSection Title="Live" Key="live">
|
||||
<NavRailItem Href="/deployments" Text="Deployments" />
|
||||
<NavRailItem Href="/alerts" Text="Alerts" />
|
||||
<NavRailItem Href="/alarms-historian" Text="Alarms historian" />
|
||||
</NavRailSection>
|
||||
</Nav>
|
||||
<RailFooter>
|
||||
<AuthorizeView>
|
||||
<Authorized>
|
||||
<div class="rail-eyebrow">Session</div>
|
||||
<a class="rail-user" href="/account">@context.User.Identity?.Name</a>
|
||||
<div class="rail-roles">
|
||||
@string.Join(", ", context.User.Claims
|
||||
.Where(c => c.Type == ClaimTypes.Role).Select(c => c.Value))
|
||||
</div>
|
||||
<form method="post" action="/auth/logout">
|
||||
<AntiforgeryToken />
|
||||
<button class="rail-btn" type="submit">Sign out</button>
|
||||
</form>
|
||||
</Authorized>
|
||||
<NotAuthorized>
|
||||
<div class="rail-eyebrow">Session</div>
|
||||
<a class="rail-btn" href="/login">Sign in</a>
|
||||
</NotAuthorized>
|
||||
</AuthorizeView>
|
||||
</RailFooter>
|
||||
<ChildContent>@Body</ChildContent>
|
||||
</ThemeShell>
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
@* A collapsible sidebar nav section: an uppercase-eyebrow button that toggles
|
||||
the visibility of its child nav items. Mirrors the ScadaLink NavSection at
|
||||
/Users/dohertj2/Desktop/scadalink-design/src/ScadaLink.CentralUI/Components/Layout/NavSection.razor
|
||||
but uses OtOpcUa's rail-eyebrow + rail-link classes. *@
|
||||
|
||||
<button type="button"
|
||||
class="rail-eyebrow-toggle"
|
||||
@onclick="OnToggle"
|
||||
aria-expanded="@(Expanded ? "true" : "false")">
|
||||
<span class="rail-eyebrow-chevron">@(Expanded ? "▼" : "▶")</span>
|
||||
<span class="rail-eyebrow-label">@Title</span>
|
||||
</button>
|
||||
@if (Expanded)
|
||||
{
|
||||
<div class="rail-section-body">
|
||||
@ChildContent
|
||||
</div>
|
||||
}
|
||||
|
||||
@code {
|
||||
/// <summary>Section label shown in the eyebrow (e.g. "Scripting").</summary>
|
||||
[Parameter, EditorRequired]
|
||||
public string Title { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Whether the section is expanded — its child links rendered.</summary>
|
||||
[Parameter]
|
||||
public bool Expanded { get; set; }
|
||||
|
||||
/// <summary>Raised when the eyebrow button is clicked.</summary>
|
||||
[Parameter]
|
||||
public EventCallback OnToggle { get; set; }
|
||||
|
||||
/// <summary>The section's child nav links, rendered only while expanded.</summary>
|
||||
[Parameter]
|
||||
public RenderFragment? ChildContent { get; set; }
|
||||
}
|
||||
@@ -1,160 +0,0 @@
|
||||
@rendermode InteractiveServer
|
||||
@using Microsoft.AspNetCore.Components.Routing
|
||||
@using Microsoft.JSInterop
|
||||
@implements IDisposable
|
||||
@inject NavigationManager Navigation
|
||||
@inject IJSRuntime JS
|
||||
|
||||
@* Interactive sidebar — extracted from MainLayout so the layout itself can stay
|
||||
statically rendered (layouts can't take RenderFragment Body across an interactive
|
||||
boundary). Hosts the collapsible NavSection groups and cookie persistence. *@
|
||||
|
||||
<nav class="side-rail">
|
||||
<div class="brand"><span class="mark">▮</span> OtOpcUa</div>
|
||||
|
||||
<NavSection Title="Navigation"
|
||||
Expanded="@_expanded.Contains("nav")"
|
||||
OnToggle="@(() => ToggleAsync("nav"))">
|
||||
<NavLink class="rail-link" href="/" Match="NavLinkMatch.All">Overview</NavLink>
|
||||
<NavLink class="rail-link" href="/fleet" Match="NavLinkMatch.Prefix">Fleet status</NavLink>
|
||||
<NavLink class="rail-link" href="/hosts" Match="NavLinkMatch.Prefix">Host status</NavLink>
|
||||
<NavLink class="rail-link" href="/clusters" Match="NavLinkMatch.Prefix">Clusters</NavLink>
|
||||
<NavLink class="rail-link" href="/reservations" Match="NavLinkMatch.Prefix">Reservations</NavLink>
|
||||
<NavLink class="rail-link" href="/certificates" Match="NavLinkMatch.Prefix">Certificates</NavLink>
|
||||
<NavLink class="rail-link" href="/role-grants" Match="NavLinkMatch.Prefix">Role grants</NavLink>
|
||||
</NavSection>
|
||||
|
||||
<NavSection Title="Scripting"
|
||||
Expanded="@_expanded.Contains("scripting")"
|
||||
OnToggle="@(() => ToggleAsync("scripting"))">
|
||||
<NavLink class="rail-link" href="/virtual-tags" Match="NavLinkMatch.Prefix">Virtual tags</NavLink>
|
||||
<NavLink class="rail-link" href="/scripted-alarms" Match="NavLinkMatch.Prefix">Scripted alarms</NavLink>
|
||||
<NavLink class="rail-link" href="/scripts" Match="NavLinkMatch.Prefix">Scripts</NavLink>
|
||||
<NavLink class="rail-link" href="/script-log" Match="NavLinkMatch.Prefix">Script log</NavLink>
|
||||
</NavSection>
|
||||
|
||||
<NavSection Title="Live"
|
||||
Expanded="@_expanded.Contains("live")"
|
||||
OnToggle="@(() => ToggleAsync("live"))">
|
||||
<NavLink class="rail-link" href="/deployments" Match="NavLinkMatch.Prefix">Deployments</NavLink>
|
||||
<NavLink class="rail-link" href="/alerts" Match="NavLinkMatch.Prefix">Alerts</NavLink>
|
||||
<NavLink class="rail-link" href="/alarms-historian" Match="NavLinkMatch.Prefix">Alarms historian</NavLink>
|
||||
</NavSection>
|
||||
|
||||
<div class="rail-foot">
|
||||
<AuthorizeView>
|
||||
<Authorized>
|
||||
<div class="rail-eyebrow">Session</div>
|
||||
<a class="rail-user" href="/account">@context.User.Identity?.Name</a>
|
||||
<div class="rail-roles">
|
||||
@string.Join(", ", context.User.Claims
|
||||
.Where(c => c.Type.EndsWith("/role")).Select(c => c.Value))
|
||||
</div>
|
||||
<form method="post" action="/auth/logout">
|
||||
<AntiforgeryToken />
|
||||
<button class="rail-btn" type="submit">Sign out</button>
|
||||
</form>
|
||||
</Authorized>
|
||||
<NotAuthorized>
|
||||
<div class="rail-eyebrow">Session</div>
|
||||
<a class="rail-btn" href="/login">Sign in</a>
|
||||
</NotAuthorized>
|
||||
</AuthorizeView>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
@code {
|
||||
// Expanded-section state persists in the `otopcua_nav` cookie via
|
||||
// wwwroot/js/nav-state.js (window.navState.get/.set). Same pattern as
|
||||
// ScadaLink CentralUI's NavMenu.
|
||||
|
||||
private static readonly string[] SectionIds = { "nav", "scripting", "live" };
|
||||
|
||||
private readonly HashSet<string> _expanded = new(StringComparer.Ordinal);
|
||||
|
||||
protected override void OnInitialized()
|
||||
{
|
||||
Navigation.LocationChanged += OnLocationChanged;
|
||||
// Seed from the URL so the current page's section is expanded on the
|
||||
// initial render — works even before JS interop is ready.
|
||||
EnsureCurrentSectionExpanded();
|
||||
}
|
||||
|
||||
protected override async Task OnAfterRenderAsync(bool firstRender)
|
||||
{
|
||||
if (!firstRender) return;
|
||||
|
||||
string saved;
|
||||
try
|
||||
{
|
||||
saved = await JS.InvokeAsync<string>("navState.get") ?? string.Empty;
|
||||
}
|
||||
catch (JSDisconnectedException) { return; }
|
||||
catch (InvalidOperationException) { return; }
|
||||
|
||||
foreach (var id in saved.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
|
||||
{
|
||||
if (Array.IndexOf(SectionIds, id) >= 0)
|
||||
_expanded.Add(id);
|
||||
}
|
||||
|
||||
if (EnsureCurrentSectionExpanded())
|
||||
await PersistAsync();
|
||||
|
||||
StateHasChanged();
|
||||
}
|
||||
|
||||
private void OnLocationChanged(object? sender, LocationChangedEventArgs e)
|
||||
{
|
||||
if (EnsureCurrentSectionExpanded())
|
||||
{
|
||||
_ = PersistAsync();
|
||||
_ = InvokeAsync(StateHasChanged);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ToggleAsync(string id)
|
||||
{
|
||||
if (!_expanded.Remove(id))
|
||||
_expanded.Add(id);
|
||||
await PersistAsync();
|
||||
}
|
||||
|
||||
private bool EnsureCurrentSectionExpanded()
|
||||
{
|
||||
var section = CurrentSection();
|
||||
return section is not null && _expanded.Add(section);
|
||||
}
|
||||
|
||||
private string? CurrentSection()
|
||||
{
|
||||
var relative = Navigation.ToBaseRelativePath(Navigation.Uri);
|
||||
var firstSegment = relative.Split('?', '#')[0]
|
||||
.Split('/', StringSplitOptions.RemoveEmptyEntries)
|
||||
.FirstOrDefault();
|
||||
|
||||
return firstSegment switch
|
||||
{
|
||||
null or "" => "nav",
|
||||
"fleet" or "hosts" or "clusters" or "reservations" or "certificates" or "role-grants" => "nav",
|
||||
"virtual-tags" or "scripted-alarms" or "scripts" or "script-log" => "scripting",
|
||||
"deployments" or "alerts" or "alarms-historian" => "live",
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
|
||||
private async Task PersistAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
await JS.InvokeVoidAsync("navState.set", string.Join(',', _expanded));
|
||||
}
|
||||
catch (JSDisconnectedException) { }
|
||||
catch (InvalidOperationException) { }
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Navigation.LocationChanged -= OnLocationChanged;
|
||||
}
|
||||
}
|
||||
@@ -41,7 +41,7 @@ else
|
||||
<div class="col-md-6 mb-3">
|
||||
<label class="form-label" for="grp">LDAP group</label>
|
||||
<InputText id="grp" @bind-Value="_form.LdapGroup" class="form-control form-control-sm mono"
|
||||
placeholder="cn=Operators,ou=FleetAdmin,dc=lmxopcua,dc=local" />
|
||||
placeholder="cn=Operators,ou=FleetAdmin,dc=zb,dc=local" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
@using Microsoft.EntityFrameworkCore
|
||||
@using ZB.MOM.WW.OtOpcUa.Configuration
|
||||
@using ZB.MOM.WW.OtOpcUa.Configuration.Entities
|
||||
@using ZB.MOM.WW.OtOpcUa.Configuration.Queries
|
||||
@inject IDbContextFactory<OtOpcUaConfigDbContext> DbFactory
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
@@ -74,10 +75,8 @@ else
|
||||
protected override async Task OnInitializedAsync()
|
||||
{
|
||||
await using var db = await DbFactory.CreateDbContextAsync();
|
||||
_rows = await db.ConfigAuditLogs.AsNoTracking()
|
||||
.Where(a => a.ClusterId == ClusterId)
|
||||
.OrderByDescending(a => a.Timestamp)
|
||||
.Take(PageSize)
|
||||
.ToListAsync();
|
||||
// Shared query: matches both the SP path (stamps ClusterId) and the structured
|
||||
// AuditWriterActor path (stamps NodeId, ClusterId null) so the latter's rows are visible.
|
||||
_rows = await ClusterAuditQuery.ForClusterAsync(db, ClusterId, PageSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
@using ZB.MOM.WW.OtOpcUa.Configuration.Enums
|
||||
@using ZB.MOM.WW.OtOpcUa.ControlPlane.AdminOperations
|
||||
|
||||
@attribute [Authorize(Roles = "FleetAdmin,ConfigEditor")]
|
||||
@attribute [Authorize(Roles = "Administrator,Designer")]
|
||||
|
||||
@inject IDbContextFactory<OtOpcUaConfigDbContext> DbFactory
|
||||
@inject IAdminOperationsClient AdminOps
|
||||
|
||||
@@ -5,39 +5,14 @@
|
||||
the form POSTs to /auth/login while ASP.NET still owns an unstarted HTTP response.
|
||||
Calling SignInAsync from an interactive circuit would be too late.
|
||||
|
||||
Uses LoginLayout (no side rail) so the page renders as a clean centred card. *@
|
||||
Uses LoginLayout (no side rail) so the page renders as a clean centred card.
|
||||
The card itself is the shared kit's <LoginCard> — it owns the .login-wrap centring
|
||||
wrapper, the .panel shell, and the static form-POST (username/password/returnUrl). *@
|
||||
@attribute [Microsoft.AspNetCore.Authorization.AllowAnonymous]
|
||||
|
||||
<div class="login-wrap rise" style="animation-delay:.02s">
|
||||
<section class="panel">
|
||||
<div style="padding:1.4rem 1.1rem 1.25rem">
|
||||
<h1 class="login-title">OtOpcUa Admin — sign in</h1>
|
||||
<form method="post" action="/auth/login" data-enhance="false">
|
||||
@if (ReturnUrl is not null)
|
||||
{
|
||||
<input type="hidden" name="returnUrl" value="@ReturnUrl"/>
|
||||
}
|
||||
<div class="mb-3">
|
||||
<label class="form-label" for="username">Username</label>
|
||||
<input id="username" name="username" type="text"
|
||||
class="form-control form-control-sm" autocomplete="username"/>
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<label class="form-label" for="password">Password</label>
|
||||
<input id="password" name="password" type="password"
|
||||
class="form-control form-control-sm" autocomplete="current-password"/>
|
||||
</div>
|
||||
|
||||
@if (!string.IsNullOrWhiteSpace(Error))
|
||||
{
|
||||
<div class="panel notice" style="margin-bottom:.85rem">@Error</div>
|
||||
}
|
||||
|
||||
<button class="btn btn-primary w-100" type="submit">Sign in</button>
|
||||
</form>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<LoginCard Product="OtOpcUa Admin" Action="/auth/login" ReturnUrl="@ReturnUrl" Error="@Error">
|
||||
<AntiforgeryToken />
|
||||
</LoginCard>
|
||||
|
||||
@code {
|
||||
/// <summary>Error message surfaced by /auth/login after a failed bind.</summary>
|
||||
|
||||
@@ -20,9 +20,9 @@
|
||||
<div class="panel-head">LDAP binding</div>
|
||||
<div class="kv"><span class="k">Enabled</span><span class="v">@(_options.Enabled ? "yes" : "no")</span></div>
|
||||
<div class="kv"><span class="k">Server</span><span class="v mono">@_options.Server:@_options.Port</span></div>
|
||||
<div class="kv"><span class="k">UseTls</span><span class="v">@_options.UseTls</span></div>
|
||||
<div class="kv"><span class="k">Transport</span><span class="v">@_options.Transport</span></div>
|
||||
<div class="kv"><span class="k">SearchBase</span><span class="v mono small">@_options.SearchBase</span></div>
|
||||
@if (!_options.UseTls && _options.AllowInsecureLdap)
|
||||
@if (_options.Transport == ZB.MOM.WW.Auth.Abstractions.Ldap.LdapTransport.None && _options.AllowInsecure)
|
||||
{
|
||||
<div class="kv"><span class="k">Warning</span><span class="v"><span class="chip chip-alert">Plaintext credentials over LDAP — dev mode only</span></span></div>
|
||||
}
|
||||
@@ -108,7 +108,7 @@
|
||||
private LdapOptions? _options;
|
||||
private IReadOnlyList<LdapGroupRoleMapping> _rows = [];
|
||||
private string _newGroup = "";
|
||||
private AdminRole _newRole = AdminRole.ConfigViewer;
|
||||
private AdminRole _newRole = AdminRole.Viewer;
|
||||
private string? _error;
|
||||
private bool _busy;
|
||||
|
||||
@@ -134,7 +134,7 @@
|
||||
LdapGroup = _newGroup.Trim(), Role = _newRole, IsSystemWide = true, ClusterId = null,
|
||||
}, default);
|
||||
_newGroup = "";
|
||||
_newRole = AdminRole.ConfigViewer;
|
||||
_newRole = AdminRole.Viewer;
|
||||
await ReloadAsync();
|
||||
}
|
||||
catch (Exception ex) { _error = ex.Message; }
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
@* Status chip — wraps the theme.css .chip / .chip-ok / .chip-warn / .chip-bad / .chip-idle classes. *@
|
||||
<span class="chip @CssClass">@Text</span>
|
||||
|
||||
@code {
|
||||
[Parameter] public string Text { get; set; } = "";
|
||||
[Parameter] public string CssClass { get; set; } = "chip-idle";
|
||||
}
|
||||
@@ -9,6 +9,7 @@
|
||||
<ItemGroup>
|
||||
<FrameworkReference Include="Microsoft.AspNetCore.App"/>
|
||||
<PackageReference Include="Microsoft.AspNetCore.SignalR.Client"/>
|
||||
<PackageReference Include="ZB.MOM.WW.Theme"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -7,3 +7,4 @@
|
||||
@using Microsoft.JSInterop
|
||||
@using ZB.MOM.WW.OtOpcUa.AdminUI.Components.Shared
|
||||
@using ZB.MOM.WW.OtOpcUa.AdminUI.Components.Layout
|
||||
@using ZB.MOM.WW.Theme
|
||||
|
||||
@@ -1,77 +1,11 @@
|
||||
/* OtOpcUa Admin — view-specific layer over the technical-light theme (theme.css).
|
||||
Tokens live in theme.css; this sheet only carries layout + the side rail. */
|
||||
|
||||
/* ── App shell: side rail + page ─────────────────────────────────────────── */
|
||||
/* The outer flex direction is supplied by Bootstrap utilities on the wrapper
|
||||
(`d-flex flex-column flex-lg-row`) so the mobile hamburger row stacks above
|
||||
the rail on <lg viewports and the rail sits beside the page on lg+. */
|
||||
.app-shell {
|
||||
align-items: stretch;
|
||||
min-height: calc(100vh - 3.3rem);
|
||||
}
|
||||
|
||||
.app-shell .page {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
/* ── Side rail ───────────────────────────────────────────────────────────── */
|
||||
.side-rail {
|
||||
width: 220px;
|
||||
flex: 0 0 220px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.15rem;
|
||||
padding: 1rem 0.7rem;
|
||||
background: var(--card);
|
||||
border-right: 1px solid var(--rule-strong);
|
||||
}
|
||||
|
||||
/* On lg+ keep the side rail pinned so it stays visible when content scrolls. */
|
||||
@media (min-width: 992px) {
|
||||
#sidebar-collapse {
|
||||
position: sticky;
|
||||
top: 0;
|
||||
height: 100vh;
|
||||
align-self: flex-start;
|
||||
z-index: 1020;
|
||||
}
|
||||
}
|
||||
|
||||
/* When the side rail is collapsed under <lg viewports the Bootstrap collapse
|
||||
container removes the fixed width; restore full width on mobile. */
|
||||
@media (max-width: 991.98px) {
|
||||
.side-rail {
|
||||
width: 100%;
|
||||
min-width: 100%;
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
}
|
||||
|
||||
/* Login card title. Replaces the panel-head top strip on the login page so the
|
||||
card reads as a self-contained sign-in form, not a tabbed panel. */
|
||||
.login-title {
|
||||
margin: 0 0 1.1rem 0;
|
||||
font-size: 1.05rem;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.01em;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
/* Brand block pinned at the top of the side rail. Mirrors ScadaLink's
|
||||
.sidebar .brand styling — used now that the top app-bar was dropped. */
|
||||
.side-rail .brand {
|
||||
color: var(--ink);
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.02em;
|
||||
padding: 1rem;
|
||||
border-bottom: 1px solid var(--rule);
|
||||
margin-bottom: 0.4rem;
|
||||
}
|
||||
.side-rail .brand .mark { color: var(--accent); }
|
||||
/* OtOpcUa Admin — app-only view layer over the shared ZB.MOM.WW.Theme kit.
|
||||
The kit's theme.css (tokens) + layout.css (side-rail shell, login card) are
|
||||
linked via <ThemeHead/> and own all of the shell/rail/login styling. This
|
||||
sheet carries only the few rules the kit does not provide. */
|
||||
|
||||
/* Static (non-collapsible) eyebrow label — used for the "Session" caption in the
|
||||
rail footer. The kit styles a collapsible section <summary> as
|
||||
.rail-eyebrow-toggle; a plain inline label needs this lighter rule. */
|
||||
.rail-eyebrow {
|
||||
font-size: 0.68rem;
|
||||
font-weight: 600;
|
||||
@@ -81,94 +15,11 @@
|
||||
padding: 0.3rem 0.6rem;
|
||||
}
|
||||
|
||||
/* Collapsible variant — rendered by NavSection.razor. Looks like .rail-eyebrow
|
||||
plus a leading chevron; clicking flips chevron + expanded state. */
|
||||
.rail-eyebrow-toggle {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
width: 100%;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
text-align: left;
|
||||
font-size: 0.68rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--ink-faint);
|
||||
padding: 0.45rem 0.6rem 0.3rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.rail-eyebrow-toggle:hover { color: var(--ink); }
|
||||
.rail-eyebrow-chevron {
|
||||
display: inline-block;
|
||||
width: 0.7rem;
|
||||
font-size: 0.55rem;
|
||||
color: var(--ink-faint);
|
||||
}
|
||||
.rail-section-body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.rail-link {
|
||||
display: block;
|
||||
padding: 0.4rem 0.6rem;
|
||||
border-radius: 4px;
|
||||
border-left: 2px solid transparent;
|
||||
font-size: 0.86rem;
|
||||
color: var(--ink-soft);
|
||||
}
|
||||
.rail-link:hover {
|
||||
background: #f3f6fd;
|
||||
color: var(--ink);
|
||||
text-decoration: none;
|
||||
}
|
||||
.rail-link.active {
|
||||
background: #eef2fc;
|
||||
border-left-color: var(--accent);
|
||||
color: var(--accent-deep);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
/* ── Session block, pinned to the rail foot ──────────────────────────────── */
|
||||
.rail-foot {
|
||||
margin-top: auto;
|
||||
padding-top: 0.6rem;
|
||||
border-top: 1px solid var(--rule);
|
||||
}
|
||||
.rail-user {
|
||||
display: block;
|
||||
padding: 0 0.6rem;
|
||||
font-weight: 600;
|
||||
font-size: 0.88rem;
|
||||
}
|
||||
.rail-roles {
|
||||
padding: 0.1rem 0.6rem 0.5rem;
|
||||
font-family: var(--mono);
|
||||
font-size: 0.72rem;
|
||||
color: var(--ink-faint);
|
||||
}
|
||||
.rail-btn {
|
||||
display: inline-block;
|
||||
margin: 0 0.6rem;
|
||||
padding: 0.3rem 0.7rem;
|
||||
font-size: 0.78rem;
|
||||
font-weight: 600;
|
||||
color: var(--ink-soft);
|
||||
background: var(--card);
|
||||
border: 1px solid var(--rule-strong);
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
.rail-btn:hover {
|
||||
border-color: var(--accent);
|
||||
color: var(--accent);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* ── Login card centring ─────────────────────────────────────────────────── */
|
||||
.login-wrap {
|
||||
max-width: 380px;
|
||||
margin: 3.5rem auto 0;
|
||||
}
|
||||
/* OtOpcUa domain pages (Alerts, ScriptLog, Fleet, Hosts, AlarmsHistorian,
|
||||
RoleGrants, ImportEquipment) use two extra status-chip variants on top of the
|
||||
kit's .chip base + .chip-ok/.chip-warn/.chip-bad/.chip-idle/.chip-info set.
|
||||
.chip-alert is the red/danger variant (mirrors the kit's .chip-bad);
|
||||
.chip-caution is the amber variant (mirrors the kit's .chip-warn). Both reuse
|
||||
the kit's status tokens so they stay on-palette. */
|
||||
.chip-alert { color: var(--bad); background: var(--bad-bg); border-color: var(--bad-border); }
|
||||
.chip-caution { color: var(--warn-ink); background: var(--warn-bg); border-color: var(--warn-border); }
|
||||
|
||||
@@ -1,379 +0,0 @@
|
||||
/* ============================================================================
|
||||
Technical-Light design system — portable theme layer
|
||||
----------------------------------------------------------------------------
|
||||
A refined technical-light aesthetic: warm-neutral paper, hairline rules,
|
||||
IBM Plex type, monospace tabular numerics, status carried by colour. Built
|
||||
to layer over Bootstrap 5 via --bs-* overrides, but every rule below works
|
||||
standalone — Bootstrap is optional.
|
||||
|
||||
HOW TO ADOPT
|
||||
1. Serve the three IBM Plex woff2 files (shipped in fonts/) and fix the
|
||||
@font-face url() paths below to wherever you serve them.
|
||||
2. Include this file once, globally. Add view-specific rules in a separate
|
||||
stylesheet — never edit the token block per-view.
|
||||
3. Status is colour, not iconography. Use the .s-* / .chip-* / .kv .v.*
|
||||
helpers; do not hand-pick hex values in feature CSS.
|
||||
========================================================================= */
|
||||
|
||||
/* ── Vendored fonts (embedded woff2, no network/CDN fetch) ───────────────────
|
||||
Adjust these url()s to your asset route. If you cannot vendor the fonts the
|
||||
--sans / --mono fallback stacks below degrade gracefully to system fonts. */
|
||||
@font-face {
|
||||
font-family: 'IBM Plex Sans';
|
||||
font-style: normal; font-weight: 400; font-display: swap;
|
||||
src: url('fonts/ibm-plex-sans-400.woff2') format('woff2');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'IBM Plex Sans';
|
||||
font-style: normal; font-weight: 600; font-display: swap;
|
||||
src: url('fonts/ibm-plex-sans-600.woff2') format('woff2');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'IBM Plex Mono';
|
||||
font-style: normal; font-weight: 500; font-display: swap;
|
||||
src: url('fonts/ibm-plex-mono-500.woff2') format('woff2');
|
||||
}
|
||||
|
||||
/* ── Design tokens ───────────────────────────────────────────────────────────
|
||||
The single source of truth. Re-theme by editing only this block. */
|
||||
:root {
|
||||
/* Surfaces & ink */
|
||||
--paper: #f4f4f1; /* page background — warm off-white, never pure */
|
||||
--card: #ffffff; /* raised surfaces: cards, bars, table heads */
|
||||
--ink: #1b1d21; /* primary text */
|
||||
--ink-soft: #5a6066; /* secondary text, labels */
|
||||
--ink-faint: #8b9097; /* tertiary text, captions, units */
|
||||
--rule: #e4e4df; /* hairline borders / row dividers */
|
||||
--rule-strong: #d2d2cb; /* emphasised hairlines: bar underline, pills */
|
||||
|
||||
/* Accent */
|
||||
--accent: #2f5fd0; /* links, sort arrows, primary actions */
|
||||
--accent-deep: #1e3f99; /* hover / pressed accent, raw-value emphasis */
|
||||
|
||||
/* Status — foreground */
|
||||
--ok: #2f9e44;
|
||||
--warn: #e8920c;
|
||||
--bad: #e03131;
|
||||
--idle: #868e96;
|
||||
|
||||
/* Status — tinted backgrounds (pair with the matching foreground) */
|
||||
--ok-bg: #e9f6ec;
|
||||
--warn-bg: #fdf1dd;
|
||||
--bad-bg: #fceaea;
|
||||
--idle-bg: #eef0f2;
|
||||
|
||||
/* Type stacks — Plex first, graceful system fallback */
|
||||
--mono: 'IBM Plex Mono', ui-monospace, 'Cascadia Mono', Consolas, monospace;
|
||||
--sans: 'IBM Plex Sans', system-ui, -apple-system, 'Segoe UI', sans-serif;
|
||||
|
||||
/* Bootstrap 5 overrides — harmless if Bootstrap is absent */
|
||||
--bs-body-bg: var(--paper);
|
||||
--bs-body-color: var(--ink);
|
||||
--bs-body-font-family: var(--sans);
|
||||
--bs-body-font-size: 0.9rem;
|
||||
--bs-primary: var(--accent);
|
||||
--bs-border-color: var(--rule);
|
||||
--bs-emphasis-color: var(--ink);
|
||||
}
|
||||
|
||||
/* ── Base ────────────────────────────────────────────────────────────────────
|
||||
The faint top-right radial is the one deliberate flourish — a soft sheen,
|
||||
not a gradient wash. Keep it subtle. */
|
||||
body {
|
||||
background:
|
||||
radial-gradient(1200px 480px at 88% -8%, #ffffff 0%, rgba(255,255,255,0) 70%),
|
||||
var(--paper);
|
||||
color: var(--ink);
|
||||
font-family: var(--sans);
|
||||
font-size: 0.9rem;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
}
|
||||
|
||||
/* Any numeric / fixed-width text. Tabular figures so columns of digits align. */
|
||||
.numeric,
|
||||
.mono { font-family: var(--mono); font-variant-numeric: tabular-nums; }
|
||||
|
||||
a { color: var(--accent); text-decoration: none; }
|
||||
a:hover { color: var(--accent-deep); text-decoration: underline; }
|
||||
|
||||
/* ── App chrome: top bar ─────────────────────────────────────────────────────
|
||||
One bar across the top: brand, breadcrumb crumbs, a flex spacer, then meta
|
||||
text and any status pill pushed hard right. */
|
||||
.app-bar {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 1rem;
|
||||
padding: 0.85rem 1.25rem;
|
||||
background: var(--card);
|
||||
border-bottom: 1px solid var(--rule-strong);
|
||||
}
|
||||
.app-bar .brand {
|
||||
font-weight: 600;
|
||||
font-size: 1.05rem;
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.app-bar .brand .mark { color: var(--accent); } /* the one accent glyph */
|
||||
.app-bar .crumb { color: var(--ink-faint); font-size: 0.85rem; }
|
||||
.app-bar .spacer { flex: 1; } /* pushes meta/pill right */
|
||||
.app-bar .meta {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.78rem;
|
||||
color: var(--ink-soft);
|
||||
}
|
||||
|
||||
/* ── Connection / liveness pill ──────────────────────────────────────────────
|
||||
A rounded pill with a dot, driven entirely by data-state. Use for any
|
||||
live-link health indicator (websocket, SSE, polling). */
|
||||
.conn-pill {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
font-size: 0.74rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.06em;
|
||||
padding: 0.2rem 0.6rem;
|
||||
border-radius: 999px;
|
||||
border: 1px solid var(--rule-strong);
|
||||
color: var(--ink-soft);
|
||||
background: var(--card);
|
||||
}
|
||||
.conn-pill .dot {
|
||||
width: 7px; height: 7px; border-radius: 50%;
|
||||
background: var(--idle);
|
||||
}
|
||||
.conn-pill[data-state="connected"] { color: var(--ok); border-color: #bfe3c6; background: var(--ok-bg); }
|
||||
.conn-pill[data-state="connected"] .dot { background: var(--ok); }
|
||||
.conn-pill[data-state="connecting"] { color: var(--warn); border-color: #f0d9ab; background: var(--warn-bg); }
|
||||
.conn-pill[data-state="connecting"] .dot { background: var(--warn); animation: pulse 1.1s ease-in-out infinite; }
|
||||
.conn-pill[data-state="disconnected"] { color: var(--bad); border-color: #f0c0c0; background: var(--bad-bg); }
|
||||
.conn-pill[data-state="disconnected"] .dot { background: var(--bad); }
|
||||
|
||||
@keyframes pulse { 0%,100% { opacity: 1; } 50% { opacity: 0.25; } }
|
||||
|
||||
/* ── Status text helpers ─────────────────────────────────────────────────────
|
||||
Recolour a value in place — counts, ratios, error totals. */
|
||||
.s-ok { color: var(--ok); }
|
||||
.s-warn { color: var(--warn); }
|
||||
.s-bad { color: var(--bad); }
|
||||
.s-idle { color: var(--idle); }
|
||||
|
||||
/* ── State chip ──────────────────────────────────────────────────────────────
|
||||
Compact rectangular badge for an enumerated state (bound/recovering/…).
|
||||
Squarer than the pill; use the pill for liveness, the chip for state. */
|
||||
.chip {
|
||||
display: inline-block;
|
||||
font-size: 0.72rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
padding: 0.15rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
border: 1px solid transparent;
|
||||
}
|
||||
.chip-ok { color: var(--ok); background: var(--ok-bg); border-color: #c6e6cd; }
|
||||
.chip-warn { color: #b56a00; background: var(--warn-bg); border-color: #efd6a6; }
|
||||
.chip-bad { color: var(--bad); background: var(--bad-bg); border-color: #eec3c3; }
|
||||
.chip-idle { color: var(--ink-soft); background: var(--idle-bg); border-color: var(--rule-strong); }
|
||||
|
||||
/* ── Panel — the base raised surface ─────────────────────────────────────────
|
||||
A white card with a hairline border and 8px radius. .panel-head is the
|
||||
uppercase eyebrow label that sits on top. */
|
||||
.panel {
|
||||
background: var(--card);
|
||||
border: 1px solid var(--rule);
|
||||
border-radius: 8px;
|
||||
}
|
||||
.panel-head {
|
||||
font-size: 0.74rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--ink-faint);
|
||||
padding: 0.6rem 0.9rem;
|
||||
border-bottom: 1px solid var(--rule);
|
||||
}
|
||||
|
||||
/* ── Page wrapper ────────────────────────────────────────────────────────────
|
||||
Centred, capped width, even gutter. */
|
||||
.page { padding: 1.25rem; max-width: 1680px; margin: 0 auto; }
|
||||
|
||||
/* ── Reveal-on-paint ─────────────────────────────────────────────────────────
|
||||
Add .rise to top-level sections; stagger with inline animation-delay
|
||||
(.02s, .08s, .14s …) so panels settle in sequence, not all at once. */
|
||||
@keyframes rise { from { opacity: 0; transform: translateY(6px); } to { opacity: 1; transform: none; } }
|
||||
.rise { animation: rise 0.4s ease both; }
|
||||
|
||||
/* ════════════════════════════════════════════════════════════════════════════
|
||||
COMPONENT LIBRARY
|
||||
Generic, reusable pieces. View-specific layout belongs in a separate sheet.
|
||||
════════════════════════════════════════════════════════════════════════════ */
|
||||
|
||||
/* ── KPI / aggregate cards ───────────────────────────────────────────────────
|
||||
A responsive strip of headline numbers. .agg-card.alert / .caution tint the
|
||||
whole card when a watched metric goes non-zero. */
|
||||
.agg-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(6, 1fr);
|
||||
gap: 0.75rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
@media (max-width: 1100px) { .agg-grid { grid-template-columns: repeat(3, 1fr); } }
|
||||
@media (max-width: 620px) { .agg-grid { grid-template-columns: repeat(2, 1fr); } }
|
||||
|
||||
.agg-card {
|
||||
background: var(--card);
|
||||
border: 1px solid var(--rule);
|
||||
border-radius: 8px;
|
||||
padding: 0.7rem 0.9rem;
|
||||
}
|
||||
.agg-label {
|
||||
font-size: 0.68rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--ink-faint);
|
||||
}
|
||||
.agg-value {
|
||||
margin-top: 0.25rem;
|
||||
font-size: 1.5rem;
|
||||
font-weight: 600;
|
||||
line-height: 1.1;
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
.agg-sub { /* trailing "/ 54", "ms" etc. — quieter */
|
||||
font-size: 0.85rem;
|
||||
font-weight: 400;
|
||||
color: var(--ink-faint);
|
||||
}
|
||||
.agg-card.alert { border-color: #eec3c3; background: var(--bad-bg); }
|
||||
.agg-card.alert .agg-value { color: var(--bad); }
|
||||
.agg-card.caution { border-color: #efd6a6; background: var(--warn-bg); }
|
||||
.agg-card.caution .agg-value { color: #b56a00; }
|
||||
|
||||
/* ── Metric card + key/value rows ────────────────────────────────────────────
|
||||
A .panel-head over a stack of .kv rows: label left, monospace value right.
|
||||
Zebra striping on even rows. .v.warn / .v.bad / .v.ok recolour a value. */
|
||||
.card-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(290px, 1fr));
|
||||
gap: 0.85rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
.metric-card {
|
||||
background: var(--card);
|
||||
border: 1px solid var(--rule);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.metric-card .panel-head { margin: 0; }
|
||||
|
||||
.kv {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: baseline;
|
||||
gap: 1rem;
|
||||
padding: 0.32rem 0.9rem;
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
.kv:nth-child(even) { background: #fbfbf9; }
|
||||
.kv .k { color: var(--ink-soft); }
|
||||
.kv .v {
|
||||
font-family: var(--mono);
|
||||
font-variant-numeric: tabular-nums;
|
||||
text-align: right;
|
||||
}
|
||||
.kv .v.warn { color: var(--warn); }
|
||||
.kv .v.bad { color: var(--bad); }
|
||||
.kv .v.ok { color: var(--ok); }
|
||||
|
||||
/* ── Toolbar ─────────────────────────────────────────────────────────────────
|
||||
Filter/search row that sits inside a .panel above a table. */
|
||||
.toolbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.6rem;
|
||||
padding: 0.6rem 0.9rem;
|
||||
border-bottom: 1px solid var(--rule);
|
||||
}
|
||||
.toolbar .spacer { flex: 1; }
|
||||
.tb-search { max-width: 280px; }
|
||||
.tb-state { max-width: 150px; }
|
||||
.tb-check {
|
||||
display: flex; align-items: center; gap: 0.35rem;
|
||||
font-size: 0.82rem; color: var(--ink-soft); white-space: nowrap;
|
||||
user-select: none;
|
||||
}
|
||||
.tb-count { font-family: var(--mono); font-size: 0.78rem; color: var(--ink-faint); }
|
||||
|
||||
/* ── Data table ──────────────────────────────────────────────────────────────
|
||||
Dense, hairline-ruled table. Uppercase sticky head on a faint fill; numeric
|
||||
columns get .num (right-aligned, monospace). Rows are clickable by default —
|
||||
drop the cursor/hover rules if yours are not. */
|
||||
.table-wrap { overflow-x: auto; }
|
||||
|
||||
.data-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
.data-table th,
|
||||
.data-table td {
|
||||
padding: 0.45rem 0.8rem;
|
||||
text-align: left;
|
||||
white-space: nowrap;
|
||||
border-bottom: 1px solid var(--rule);
|
||||
}
|
||||
.data-table th {
|
||||
font-size: 0.7rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--ink-faint);
|
||||
background: #fbfbf9;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
}
|
||||
.data-table th.num,
|
||||
.data-table td.num { text-align: right; font-family: var(--mono); }
|
||||
|
||||
.data-table th.sortable { cursor: pointer; user-select: none; }
|
||||
.data-table th.sortable:hover { color: var(--ink); }
|
||||
.data-table th.sorted-asc::after { content: ' \2191'; color: var(--accent); }
|
||||
.data-table th.sorted-desc::after { content: ' \2193'; color: var(--accent); }
|
||||
|
||||
.data-table tbody tr { cursor: pointer; transition: background 0.08s; }
|
||||
.data-table tbody tr:hover { background: #f3f6fd; }
|
||||
.data-table tbody tr:last-child td { border-bottom: none; }
|
||||
|
||||
.empty-row {
|
||||
text-align: center !important;
|
||||
color: var(--ink-faint);
|
||||
padding: 1.6rem !important;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* ── Direction / category tag ────────────────────────────────────────────────
|
||||
Tiny inline tag for a per-row category (e.g. read vs write). */
|
||||
.dir-tag {
|
||||
font-size: 0.68rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
padding: 0.1rem 0.4rem;
|
||||
border-radius: 3px;
|
||||
}
|
||||
.dir-read { color: var(--accent-deep); background: #e7ecfb; }
|
||||
.dir-write { color: #8a5a00; background: var(--warn-bg); }
|
||||
|
||||
/* ── Inline notice ───────────────────────────────────────────────────────────
|
||||
A .panel with a warning tint — for "this thing is gone / degraded" banners. */
|
||||
.notice {
|
||||
padding: 0.85rem 1.1rem;
|
||||
margin-bottom: 1rem;
|
||||
color: #b56a00;
|
||||
background: var(--warn-bg);
|
||||
border-color: #efd6a6;
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,19 +0,0 @@
|
||||
// Sidebar nav collapse state — persisted in the `otopcua_nav` cookie so it
|
||||
// survives full page reloads and reconnects. Invoked from MainLayout.razor via
|
||||
// JS interop (window.navState.get / .set). Mirrors the ScadaLink pattern at
|
||||
// /Users/dohertj2/Desktop/scadalink-design/src/ScadaLink.CentralUI/wwwroot/js/nav-state.js.
|
||||
window.navState = {
|
||||
// Returns the raw cookie value (comma-separated expanded section ids), or
|
||||
// an empty string when the cookie is absent.
|
||||
get: function () {
|
||||
const match = document.cookie.match(/(?:^|;\s*)otopcua_nav=([^;]*)/);
|
||||
return match ? decodeURIComponent(match[1]) : "";
|
||||
},
|
||||
// Writes the cookie with a one-year lifetime. SameSite=Lax; not HttpOnly
|
||||
// (JS must write it) and not sensitive.
|
||||
set: function (value) {
|
||||
const oneYearSeconds = 60 * 60 * 24 * 365;
|
||||
document.cookie = "otopcua_nav=" + encodeURIComponent(value) +
|
||||
";path=/;max-age=" + oneYearSeconds + ";samesite=lax";
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,44 @@
|
||||
using ZB.MOM.WW.Audit;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Audit;
|
||||
|
||||
/// <summary>
|
||||
/// Maps OtOpcUa's audit <c>Action</c> vocabulary onto the canonical
|
||||
/// <see cref="AuditOutcome"/>. The vocabulary is the set of values documented on
|
||||
/// <c>ConfigAuditLog.EventType</c>: config verbs are <see cref="AuditOutcome.Success"/>,
|
||||
/// the two authorization-rejection events are <see cref="AuditOutcome.Denied"/>. OtOpcUa
|
||||
/// emits no <see cref="AuditOutcome.Failure"/> events today.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Pure function — no live emit sites construct an <see cref="AuditEvent"/> in production
|
||||
/// (the structured audit path is dormant; all live audit flows through the bespoke stored
|
||||
/// procedure path). This helper exists so that when the structured path is wired up, the
|
||||
/// required <c>Outcome</c> field is derived consistently from the action verb. Tested, not
|
||||
/// yet exercised in production.
|
||||
/// </remarks>
|
||||
public static class AuditOutcomeMapper
|
||||
{
|
||||
/// <summary>
|
||||
/// Derives the canonical <see cref="AuditOutcome"/> for an OtOpcUa audit action verb.
|
||||
/// Unknown verbs default to <see cref="AuditOutcome.Success"/> (config writes are the
|
||||
/// overwhelming majority and the only non-success cases are the two explicit
|
||||
/// authorization rejections enumerated below).
|
||||
/// </summary>
|
||||
/// <param name="action">The audit action verb (e.g. <c>DraftCreated</c>, <c>OpcUaAccessDenied</c>).</param>
|
||||
/// <returns>The mapped outcome.</returns>
|
||||
public static AuditOutcome FromAction(string action) => action switch
|
||||
{
|
||||
"OpcUaAccessDenied" or "CrossClusterNamespaceAttempt" => AuditOutcome.Denied,
|
||||
"DraftCreated"
|
||||
or "DraftEdited"
|
||||
or "Published"
|
||||
or "RolledBack"
|
||||
or "NodeApplied"
|
||||
or "ClusterCreated"
|
||||
or "NodeAdded"
|
||||
or "CredentialAdded"
|
||||
or "CredentialDisabled"
|
||||
or "ExternalIdReleased" => AuditOutcome.Success,
|
||||
_ => AuditOutcome.Success,
|
||||
};
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Event;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Audit;
|
||||
using ZB.MOM.WW.Audit;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
@@ -19,8 +19,13 @@ namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Audit;
|
||||
/// <c>UX_ConfigAuditLog_EventId</c> (cross-restart safety — a retry of an already-flushed
|
||||
/// batch hits the constraint and we drop the duplicate insert without losing the rest of
|
||||
/// the batch).
|
||||
///
|
||||
/// Implements the shared <see cref="IAuditWriter"/> seam: <see cref="WriteAsync"/> is a
|
||||
/// best-effort, never-throwing entry point that simply <c>Tell</c>s this actor and returns
|
||||
/// a completed task, so non-Akka callers can emit canonical audit events through the same
|
||||
/// batching/dedup pipeline as in-cluster <c>Tell</c> traffic.
|
||||
/// </summary>
|
||||
public sealed class AuditWriterActor : ReceiveActor, IWithTimers
|
||||
public sealed class AuditWriterActor : ReceiveActor, IWithTimers, IAuditWriter
|
||||
{
|
||||
public const int FlushBatchSize = 500;
|
||||
public static readonly TimeSpan FlushInterval = TimeSpan.FromSeconds(5);
|
||||
@@ -52,6 +57,23 @@ public sealed class AuditWriterActor : ReceiveActor, IWithTimers
|
||||
Timers.StartPeriodicTimer("flush", Flush.Instance, FlushInterval);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IAuditWriter"/> seam. Best-effort and never throws: routes the event onto this
|
||||
/// actor's mailbox via <c>Tell</c> (thread-safe from any caller) so it flows through the same
|
||||
/// batching + dedup pipeline as in-cluster traffic, then returns immediately. The actual
|
||||
/// persistence happens asynchronously on the next flush; a write failure there is logged and
|
||||
/// the batch dropped (per the best-effort audit contract).
|
||||
/// </summary>
|
||||
/// <param name="evt">The canonical audit event to persist.</param>
|
||||
/// <param name="ct">Unused — enqueue is synchronous and non-blocking.</param>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task WriteAsync(AuditEvent evt, CancellationToken ct = default)
|
||||
{
|
||||
// Akka Tell is safe to call from any thread and never throws to the caller.
|
||||
Self.Tell(evt);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private void HandleEvent(AuditEvent evt)
|
||||
{
|
||||
// In-buffer dedup. Last write wins on duplicate EventId within the batch — events
|
||||
@@ -74,13 +96,14 @@ public sealed class AuditWriterActor : ReceiveActor, IWithTimers
|
||||
{
|
||||
db.ConfigAuditLogs.Add(new ConfigAuditLog
|
||||
{
|
||||
Timestamp = evt.OccurredAtUtc,
|
||||
Timestamp = evt.OccurredAtUtc.UtcDateTime,
|
||||
Principal = evt.Actor,
|
||||
EventType = $"{evt.Category}:{evt.Action}",
|
||||
NodeId = evt.SourceNode.Value,
|
||||
NodeId = evt.SourceNode,
|
||||
DetailsJson = evt.DetailsJson,
|
||||
EventId = evt.EventId,
|
||||
CorrelationId = evt.CorrelationId.Value,
|
||||
CorrelationId = evt.CorrelationId,
|
||||
Outcome = evt.Outcome.ToString(),
|
||||
});
|
||||
}
|
||||
db.SaveChanges();
|
||||
|
||||
@@ -30,10 +30,15 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
private readonly TimeSpan _applyDeadline;
|
||||
private readonly ILoggingAdapter _log = Context.GetLogger();
|
||||
private readonly Dictionary<NodeId, ApplyAckOutcome> _acks = new();
|
||||
// NodeId equality here is case-insensitive (by Value) to match the case-insensitive ClusterId/
|
||||
// NodeId scoping in DeploymentArtifact.ResolveClusterScope — so an ack from a node whose address
|
||||
// differs only in case still matches its expected-ack entry (SQL collation + DNS are
|
||||
// case-insensitive, so the same node can surface with different casing).
|
||||
private static readonly IEqualityComparer<NodeId> NodeIdComparer = new CaseInsensitiveNodeIdComparer();
|
||||
private readonly Dictionary<NodeId, ApplyAckOutcome> _acks = new(NodeIdComparer);
|
||||
|
||||
private DeploymentId? _current;
|
||||
private HashSet<NodeId> _expectedAcks = new();
|
||||
private HashSet<NodeId> _expectedAcks = new(NodeIdComparer);
|
||||
|
||||
/// <summary>Gets the timer scheduler for managing apply deadlines.</summary>
|
||||
public ITimerScheduler Timers { get; set; } = null!;
|
||||
@@ -88,7 +93,7 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
.AsNoTracking()
|
||||
.ToList();
|
||||
|
||||
_expectedAcks = nodeStates.Select(s => NodeId.Parse(s.NodeId)).ToHashSet();
|
||||
_expectedAcks = nodeStates.Select(s => NodeId.Parse(s.NodeId)).ToHashSet(NodeIdComparer);
|
||||
foreach (var s in nodeStates.Where(s => s.Status != NodeDeploymentStatus.Applying))
|
||||
_acks[NodeId.Parse(s.NodeId)] = s.Status == NodeDeploymentStatus.Applied
|
||||
? ApplyAckOutcome.Applied
|
||||
@@ -248,7 +253,7 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
private HashSet<NodeId> DiscoverDriverNodes()
|
||||
{
|
||||
var cluster = Akka.Cluster.Cluster.Get(Context.System);
|
||||
var nodes = new HashSet<NodeId>();
|
||||
var nodes = new HashSet<NodeId>(NodeIdComparer);
|
||||
foreach (var member in cluster.State.Members)
|
||||
{
|
||||
if (member.Status is not (MemberStatus.Up or MemberStatus.Joining)) continue;
|
||||
@@ -261,4 +266,18 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
/// <summary>Case-insensitive <see cref="NodeId"/> equality (by <see cref="NodeId.Value"/>),
|
||||
/// matching the case-insensitive scoping in <c>DeploymentArtifact.ResolveClusterScope</c> so the
|
||||
/// expected-ack set and incoming acks agree regardless of host-name casing.</summary>
|
||||
private sealed class CaseInsensitiveNodeIdComparer : IEqualityComparer<NodeId>
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public bool Equals(NodeId x, NodeId y) =>
|
||||
string.Equals(x.Value, y.Value, StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
/// <inheritdoc />
|
||||
public int GetHashCode(NodeId obj) =>
|
||||
StringComparer.OrdinalIgnoreCase.GetHashCode(obj.Value ?? string.Empty);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
<PackageReference Include="Akka.Cluster.Hosting"/>
|
||||
<PackageReference Include="Akka.Cluster.Tools"/>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore"/>
|
||||
<PackageReference Include="ZB.MOM.WW.Audit"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
using ZB.MOM.WW.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Security.Ldap;
|
||||
using LdapTransport = ZB.MOM.WW.Auth.Abstractions.Ldap.LdapTransport;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Fail-fast startup validator for <see cref="LdapOptions"/>, built on the shared
|
||||
/// <c>ZB.MOM.WW.Configuration</c> <see cref="OptionsValidatorBase{TOptions}"/>. When LDAP login
|
||||
/// is enabled, <c>Server</c> and <c>SearchBase</c> must be set and <c>Port</c> must be a valid
|
||||
/// TCP port; when disabled — or when <c>DevStubMode</c> bypasses the real bind — all checks are
|
||||
/// skipped. <c>ServiceAccountDn</c>/<c>Password</c> are
|
||||
/// intentionally not required — an empty pair selects the direct-bind path (see
|
||||
/// <see cref="LdapOptions.ServiceAccountDn"/>). Failure messages use <c>"Ldap:"</c> as a
|
||||
/// human-readable field prefix — not the literal bound section path, which is
|
||||
/// <c>Security:Ldap</c> (see <see cref="LdapOptions.SectionName"/>).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Insecure-transport guard (review fix): a real-LDAP config that selects plaintext transport
|
||||
/// (<see cref="LdapTransport.None"/>) without opting in via <see cref="LdapOptions.AllowInsecure"/>
|
||||
/// now FAILS startup validation, so an insecure-by-accident production overlay never boots.
|
||||
/// This mirrors the login-time fail-closed guard in <see cref="OtOpcUaLdapAuthService"/> and is
|
||||
/// gated on the same conditions (<see cref="LdapOptions.Enabled"/> AND not
|
||||
/// <see cref="LdapOptions.DevStubMode"/>): a disabled or dev-stub config is exempt, exactly as it
|
||||
/// is exempt from the real bind. The login-time guard remains as defence in depth.
|
||||
/// </remarks>
|
||||
public sealed class LdapOptionsValidator : OptionsValidatorBase<LdapOptions>
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Validate(ValidationBuilder builder, LdapOptions options)
|
||||
{
|
||||
// Skip the real-LDAP field checks when LDAP login is disabled, or when the dev stub is
|
||||
// active — DevStubMode bypasses the real bind entirely, so Server/SearchBase/Port are
|
||||
// irrelevant and would otherwise force dev configs to carry meaningless placeholders.
|
||||
if (!options.Enabled || options.DevStubMode) return;
|
||||
|
||||
builder.RequireThat(!string.IsNullOrWhiteSpace(options.Server),
|
||||
"Ldap:Server is required when LDAP login is enabled.");
|
||||
builder.RequireThat(!string.IsNullOrWhiteSpace(options.SearchBase),
|
||||
"Ldap:SearchBase is required when LDAP login is enabled.");
|
||||
builder.Port(options.Port, "Ldap:Port");
|
||||
|
||||
// Fail closed at startup on a plaintext transport unless explicitly opted in — same
|
||||
// condition the login-time guard in OtOpcUaLdapAuthService enforces, lifted to boot so an
|
||||
// insecure-by-accident production overlay refuses to start rather than silently failing
|
||||
// every bind at login.
|
||||
builder.RequireThat(
|
||||
!(options.Transport == LdapTransport.None && !options.AllowInsecure),
|
||||
"LDAP transport is None (plaintext) but AllowInsecure is false — set Transport to Ldaps/StartTls or set AllowInsecure for dev.");
|
||||
}
|
||||
}
|
||||
+33
@@ -0,0 +1,33 @@
|
||||
using ZB.MOM.WW.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Fail-fast startup validator for <see cref="OpcUaApplicationHostOptions"/>, built on the
|
||||
/// shared <c>ZB.MOM.WW.Configuration</c> <see cref="OptionsValidatorBase{TOptions}"/>. The C#
|
||||
/// defaults are all valid, so a host with no explicit <c>"OpcUa"</c> section passes untouched;
|
||||
/// the validator exists to reject explicit prod/env overrides before the OPC UA SDK boots.
|
||||
/// Identity/transport essentials (<c>ApplicationName</c>, <c>ApplicationUri</c>,
|
||||
/// <c>PublicHostname</c>, <c>PkiStoreRoot</c>, <c>OpcUaPort</c>) must be present/valid and at
|
||||
/// least one security profile must be enabled. Optional fields — <c>ApplicationConfigPath</c>,
|
||||
/// <c>PeerApplicationUris</c>, <c>AutoAcceptUntrustedClientCertificates</c>, and
|
||||
/// <c>ProductUri</c> — are intentionally not validated. Failure messages carry the real
|
||||
/// <c>"OpcUa:"</c> section prefix matching the bound configuration section.
|
||||
/// </summary>
|
||||
public sealed class OpcUaApplicationHostOptionsValidator : OptionsValidatorBase<OpcUaApplicationHostOptions>
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Validate(ValidationBuilder builder, OpcUaApplicationHostOptions o)
|
||||
{
|
||||
builder.Required(o.ApplicationName, "OpcUa:ApplicationName");
|
||||
builder.Required(o.ApplicationUri, "OpcUa:ApplicationUri");
|
||||
builder.Required(o.PublicHostname, "OpcUa:PublicHostname");
|
||||
builder.Required(o.PkiStoreRoot, "OpcUa:PkiStoreRoot");
|
||||
builder.Port(o.OpcUaPort, "OpcUa:OpcUaPort");
|
||||
// EnabledSecurityProfiles is declared as IList<T> — that interface does not derive from
|
||||
// IReadOnlyCollection<T>, so it can't bind to MinCount's IReadOnlyCollection<T> parameter
|
||||
// directly. ToList() bridges to the shared primitive while preserving the count (and message).
|
||||
builder.MinCount(o.EnabledSecurityProfiles?.ToList(), 1, "OpcUa:EnabledSecurityProfiles");
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Reports Healthy on the admin-role leader, Degraded on a non-leader admin member. Used by
|
||||
/// the <c>/health/active</c> endpoint so external load balancers can route admin-singleton
|
||||
/// traffic to the current leader (cookie sessions still work on either node — DataProtection
|
||||
/// keys are shared).
|
||||
/// </summary>
|
||||
public sealed class AdminRoleLeaderHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IClusterRoleInfo _roleInfo;
|
||||
|
||||
/// <summary>Initializes a new instance of the AdminRoleLeaderHealthCheck class.</summary>
|
||||
/// <param name="roleInfo">The cluster role information provider.</param>
|
||||
public AdminRoleLeaderHealthCheck(IClusterRoleInfo roleInfo)
|
||||
{
|
||||
_roleInfo = roleInfo;
|
||||
}
|
||||
|
||||
/// <summary>Checks the health status of the admin role leader.</summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task representing the health check operation.</returns>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_roleInfo.HasRole("admin"))
|
||||
return Task.FromResult(HealthCheckResult.Healthy("Node does not carry admin role"));
|
||||
|
||||
var leader = _roleInfo.RoleLeader("admin");
|
||||
var isLeader = leader is not null && leader.Value.Equals(_roleInfo.LocalNode);
|
||||
|
||||
return Task.FromResult(isLeader
|
||||
? HealthCheckResult.Healthy($"Admin leader ({_roleInfo.LocalNode})")
|
||||
: HealthCheckResult.Degraded($"Admin member but not leader (leader={leader?.Value ?? "<unknown>"})"));
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class AkkaClusterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly ActorSystem _system;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the AkkaClusterHealthCheck class.
|
||||
/// </summary>
|
||||
/// <param name="system">The Akka actor system to check cluster health for.</param>
|
||||
public AkkaClusterHealthCheck(ActorSystem system)
|
||||
{
|
||||
_system = system;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the Akka cluster asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var cluster = Akka.Cluster.Cluster.Get(_system);
|
||||
var selfUp = cluster.State.Members.Any(m =>
|
||||
m.Address == cluster.SelfAddress && m.Status == MemberStatus.Up);
|
||||
|
||||
return Task.FromResult(selfUp
|
||||
? HealthCheckResult.Healthy($"Self Up; {cluster.State.Members.Count} member(s)")
|
||||
: HealthCheckResult.Degraded("Self not yet Up in cluster"));
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class DatabaseHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DatabaseHealthCheck"/> class.
|
||||
/// </summary>
|
||||
/// <param name="dbFactory">The database context factory for the config database.</param>
|
||||
public DatabaseHealthCheck(IDbContextFactory<OtOpcUaConfigDbContext> dbFactory)
|
||||
{
|
||||
_dbFactory = dbFactory;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the configuration database.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var db = await _dbFactory.CreateDbContextAsync(cancellationToken);
|
||||
await db.Deployments.AsNoTracking().Take(1).ToListAsync(cancellationToken);
|
||||
return HealthCheckResult.Healthy("ConfigDb reachable");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("ConfigDb unreachable", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,25 +1,40 @@
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.Health;
|
||||
using ZB.MOM.WW.Health.Akka;
|
||||
using ZB.MOM.WW.Health.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public static class HealthEndpoints
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers the standard ASP.NET Core health-check infrastructure plus the OtOpcUa-specific
|
||||
/// probes. Mirrors ScadaLink's three-tier pattern: <c>ready</c> = boot ok; <c>active</c> =
|
||||
/// fully serving traffic; <c>healthz</c> = bare process liveness.
|
||||
/// Registers the shared ZB.MOM.WW health probes. Tier semantics preserved: configdb + akka on
|
||||
/// ready+active; admin-leader on active only.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection to register health checks with.</param>
|
||||
public static IServiceCollection AddOtOpcUaHealth(this IServiceCollection services)
|
||||
{
|
||||
services.AddHealthChecks()
|
||||
.AddCheck<DatabaseHealthCheck>("configdb", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AkkaClusterHealthCheck>("akka", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AdminRoleLeaderHealthCheck>("admin-leader", tags: new[] { "active" });
|
||||
.AddTypeActivatedCheck<DatabaseHealthCheck<OtOpcUaConfigDbContext>>(
|
||||
"configdb",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: new DatabaseHealthCheckOptions<OtOpcUaConfigDbContext>
|
||||
{
|
||||
ProbeQuery = static (db, ct) => db.Deployments.AsNoTracking().Take(1).ToListAsync(ct),
|
||||
})
|
||||
.AddTypeActivatedCheck<AkkaClusterHealthCheck>(
|
||||
"akka",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: AkkaClusterStatusPolicy.OtOpcUaCompat)
|
||||
.AddTypeActivatedCheck<ActiveNodeHealthCheck>(
|
||||
"admin-leader",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Active },
|
||||
args: "admin");
|
||||
return services;
|
||||
}
|
||||
|
||||
@@ -27,21 +42,7 @@ public static class HealthEndpoints
|
||||
/// <param name="app">The endpoint route builder.</param>
|
||||
public static IEndpointRouteBuilder MapOtOpcUaHealth(this IEndpointRouteBuilder app)
|
||||
{
|
||||
// AllowAnonymous on all three — Traefik / k8s liveness probes / load-balancers
|
||||
// hit these without credentials. Without it the AddOtOpcUaAuth fallback policy
|
||||
// 401s every probe and Traefik marks every backend unhealthy.
|
||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("ready"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/health/active", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("active"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/healthz", new HealthCheckOptions
|
||||
{
|
||||
Predicate = _ => false, // process-liveness only — no probes run.
|
||||
}).AllowAnonymous();
|
||||
app.MapZbHealth();
|
||||
return app;
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user