Compare commits
300 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 61193629b6 | |||
| e3a27422a1 | |||
| 32d7fd7cc9 | |||
| de666b24c3 | |||
| a4fb97aef8 | |||
| da4634d67e | |||
| 869be660fd | |||
| a8916c3e08 | |||
| 79b2345834 | |||
| 4df5b849ac | |||
| a58151e99e | |||
| 1fd093d95d | |||
| f210f09caf | |||
| 042f3b6a65 | |||
| bc40388914 | |||
| b719194046 | |||
| 7570df76d3 | |||
| 244949caa3 | |||
| a5a0d06dbe | |||
| 6882761f4c | |||
| 15f3797f1e | |||
| 534d670b21 | |||
| b351a81c8f | |||
| f655efc570 | |||
| c4116e54c9 | |||
| c3fec1426c | |||
| a2761e4b98 | |||
| 4a469fbe06 | |||
| e2fa6754bb | |||
| b76561a780 | |||
| c49fccbe0c | |||
| 5622e51006 | |||
| 9e479ce675 | |||
| af691f3291 | |||
| 453340e71e | |||
| b64d670303 | |||
| c83e9397e6 | |||
| 74b9218a92 | |||
| 532e9933f3 | |||
| ee8add4416 | |||
| bc4fce5fbe | |||
| 7a0b8525a9 | |||
| 560b327ee1 | |||
| d1b6cff085 | |||
| ef17d2e595 | |||
| e439100937 | |||
| 7c9621040e | |||
| 1b0baf7025 | |||
| f31af0093f | |||
| 6e365ef1a9 | |||
| 1dbd3b2a6d | |||
| 48c3c56073 | |||
| 5475ab2aa3 | |||
| 1a143beeb9 | |||
| 641b2ecbcf | |||
| 09d1bbac00 | |||
| b869af2b3d | |||
| 56be42913c | |||
| dc8a2dd52c | |||
| d605d0b20d | |||
| 85676db3a5 | |||
| bec2988309 | |||
| 7cd5cde315 | |||
| 7c92297d0e | |||
| 81f09a7054 | |||
| c962b86bde | |||
| fcd0b9b355 | |||
| 0d3ec46c14 | |||
| 662f3f9f5c | |||
| dcd2509548 | |||
| 64e4726fff | |||
| 494da22cd1 | |||
| 063005fefa | |||
| ffcc8d1065 | |||
| 4b374fd177 | |||
| 54f0dbddb9 | |||
| c19d124e89 | |||
| f3f328c25c | |||
| 4584612a1a | |||
| 4203b84d51 | |||
| 29370fde3c | |||
| 3f23a1acd3 | |||
| 4d5c6ac892 | |||
| c4086c243c | |||
| a971db3ee5 | |||
| 5f8fa7004c | |||
| 059a6218f7 | |||
| 8149739161 | |||
| 2c16062457 | |||
| dc21cbad53 | |||
| dfbf6793de | |||
| a243cfd126 | |||
| 5cad9b260e | |||
| a3073d16bf | |||
| efcc2311e6 | |||
| 7014c9376c | |||
| 27b3a014da | |||
| 55e8bf70d9 | |||
| c0ce5d02bd | |||
| a28f4cdd25 | |||
| a008530af6 | |||
| 1ff3875a19 | |||
| 85af126406 | |||
| f2f6eeb74e | |||
| 8c0a32025d | |||
| 5ffbc42d8c | |||
| 5f0e0482ed | |||
| d892ab9e12 | |||
| 9f62f2c242 | |||
| a88721ce31 | |||
| 4902295211 | |||
| b474d63335 | |||
| 5058a56645 | |||
| dc12c3732e | |||
| c1c68c9134 | |||
| af06648558 | |||
| 64e3fbe035 | |||
| f9fc7dd2e1 | |||
| 7dfbca6469 | |||
| 44b8a9c7ff | |||
| 60beb9128e | |||
| 6884de9774 | |||
| c064ec16cf | |||
| ed1c17bc7b | |||
| 1e64488c0d | |||
| f02071c9a2 | |||
| 993e012e55 | |||
| 961e09430a | |||
| a1a7646b33 | |||
| e4d0d82f7f | |||
| 2915755a7c | |||
| a5c6ce279e | |||
| 59b3d9f295 | |||
| 89095c15e3 | |||
| bdae749b2b | |||
| e8c4f18607 | |||
| cb936db7d6 | |||
| a5412c16a3 | |||
| dce2528c68 | |||
| 83eda9e826 | |||
| 70ffd2849d | |||
| 898a47746d | |||
| 25ce111981 | |||
| 7209bc99e2 | |||
| 2c49f18442 | |||
| 05a0596fb1 | |||
| 219d10a22d | |||
| 607dc51dec | |||
| 9d86287d08 | |||
| 2697af31d1 | |||
| 52997ee164 | |||
| 21eac21409 | |||
| 8b08566f41 | |||
| 50787823d3 | |||
| 7e22e2250c | |||
| d21f6947e1 | |||
| 7fa863f6da | |||
| f427dc4f26 | |||
| 3e3f7588bd | |||
| c02f016f1d | |||
| a1325299ce | |||
| 14fb2b05ed | |||
| da141497f8 | |||
| 9892ceae9a | |||
| 59858129cb | |||
| e248e037e7 | |||
| ae980aef5d | |||
| 2662ac08e4 | |||
| 45740578c9 | |||
| 5ae67a48ba | |||
| d055cb059e | |||
| 74161f9460 | |||
| 396052a126 | |||
| fd0cc4dfdb | |||
| 850d6774ea | |||
| 5c754ecffd | |||
| 68c6f36cfe | |||
| 36c4751571 | |||
| 229282ad8b | |||
| b0a2bb037d | |||
| ba6e5dd7f9 | |||
| 686138123f | |||
| cd5540cb1a | |||
| 4e6ef648d1 | |||
| f18c285cca | |||
| 7a6b016d9e | |||
| 8f32b89fb9 | |||
| 337a691629 | |||
| b06e3ae740 | |||
| f57f61deac | |||
| 8e5c8e29f7 | |||
| 253fb60459 | |||
| 8ac71db464 | |||
| 7e3b56c27d | |||
| e40615dad5 | |||
| 1689901c0e | |||
| 3c3fef911c | |||
| a8becc9c46 | |||
| 5cfbe8b5dd | |||
| 62e3cd6599 | |||
| d6fac2d81d | |||
| bb353c4d43 | |||
| 45a8c79ffe | |||
| b266f63cd7 | |||
| dfc143cdeb | |||
| 463512d1d8 | |||
| 09d6676e1f | |||
| 698709a578 | |||
| 76310b8829 | |||
| 2b75ce3876 | |||
| 8b4de8080b | |||
| fa1d685ccd | |||
| e2b357f89a | |||
| eb4280b7eb | |||
| 8a1f97b27f | |||
| f167808a2c | |||
| b83f099394 | |||
| f022499e7f | |||
| 26d8f2f620 | |||
| 1a067e609c | |||
| 5e31449529 | |||
| b7c117ab31 | |||
| 2877a883cd | |||
| 2e4f1399bb | |||
| e31547d00e | |||
| 28639cb14d | |||
| e115f13104 | |||
| 95ef533822 | |||
| 39729bfe21 | |||
| 64c627f8d6 | |||
| ed130135ca | |||
| ea6f972e96 | |||
| 52bf4b3371 | |||
| dd122c4ca9 | |||
| f193872891 | |||
| bad2aef137 | |||
| 6b37f997ad | |||
| 62e12dab95 | |||
| ef683f5073 | |||
| 9f61cd5989 | |||
| 9582e448d5 | |||
| 1955bc5f4d | |||
| 23f669c376 | |||
| 14acab5a58 | |||
| 32574b3e4e | |||
| fc22d4f7b6 | |||
| 973a3d1b9a | |||
| 38ea0c5086 | |||
| e38f22e3c2 | |||
| 8be84ba27b | |||
| 207fc6aba9 | |||
| 93316e3431 | |||
| 567b8cac1d | |||
| f35925b57e | |||
| e0b6d5680b | |||
| c217c49f69 | |||
| dfb06368cd | |||
| f184f8ed1b | |||
| 3d0f4dc168 | |||
| fdb4ac7051 | |||
| 136234e7f2 | |||
| 5d3a5a40d7 | |||
| fee4a8c008 | |||
| c168c1c9c6 | |||
| 605dbf3dcc | |||
| e00f46d723 | |||
| 3c915e652e | |||
| 1ddf8bb50e | |||
| 13d3aeab09 | |||
| 4bb4ad8acb | |||
| 990ce343fe | |||
| 8e2c4f2835 | |||
| 30a2104fa5 | |||
| 2b811477d1 | |||
| fac32ad69b | |||
| ef4a70751c | |||
| 866dc03fac | |||
| c6082aa0b9 | |||
| b1f3e09661 | |||
| 49644fc7fd | |||
| 3d982d9a65 | |||
| 23d59d73f2 | |||
| c2abbf45bd | |||
| 3a53d03d23 | |||
| fb7c6c7046 | |||
| a6ae4e22d1 | |||
| 41e62b2663 | |||
| a9be80923c | |||
| 994997ba7b | |||
| 0001cdd579 | |||
| 7b6ab2ec6f | |||
| 5a9c4591b9 | |||
| 0f8ce1cb80 | |||
| 1b10194634 | |||
| 59ecd18169 | |||
| 2a6ac07111 | |||
| 7fe9f16cf8 | |||
| 879925180b | |||
| 3ca569f621 | |||
| 6923be3aa2 |
@@ -0,0 +1,76 @@
|
||||
# CI for the v2 branch — runs on every push + PR to the v2-akka-fuse / master
|
||||
# branches. Layered into three jobs:
|
||||
# build dotnet restore + build (fast feedback on compile errors)
|
||||
# unit-tests every v2 unit-test project
|
||||
# integration 2-node Host.IntegrationTests harness
|
||||
#
|
||||
# Skips E2E (Category=E2E) — that runs nightly via v2-e2e.yml against the full
|
||||
# four-node docker-dev stack.
|
||||
#
|
||||
# Compatible with both GitHub Actions and Gitea Actions (act_runner). The .NET 10
|
||||
# SDK is pinned via global.json at the repo root; if no global.json exists, the
|
||||
# setup-dotnet step falls back to dotnet-version below.
|
||||
|
||||
name: v2-ci
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [v2-akka-fuse, master]
|
||||
pull_request:
|
||||
branches: [v2-akka-fuse, master]
|
||||
workflow_dispatch: {}
|
||||
|
||||
env:
|
||||
DOTNET_NOLOGO: "1"
|
||||
DOTNET_CLI_TELEMETRY_OPTOUT: "1"
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet restore
|
||||
run: dotnet restore ZB.MOM.WW.OtOpcUa.slnx
|
||||
- name: dotnet build
|
||||
run: dotnet build ZB.MOM.WW.OtOpcUa.slnx --no-restore --configuration Release
|
||||
|
||||
unit-tests:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet test ${{ matrix.project }}
|
||||
run: dotnet test ${{ matrix.project }} --configuration Release --filter "Category!=E2E"
|
||||
|
||||
integration:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet test ${{ matrix.project }}
|
||||
run: dotnet test ${{ matrix.project }} --configuration Release --filter "Category!=E2E"
|
||||
@@ -0,0 +1,57 @@
|
||||
# Nightly E2E job. Runs against the docker-dev four-node fleet (admin-a +
|
||||
# admin-b + driver-a + driver-b + SQL + LDAP + Traefik). Trigger:
|
||||
# - cron at 03:00 UTC daily
|
||||
# - workflow_dispatch from the Actions UI for on-demand runs
|
||||
#
|
||||
# The E2E test project (tests/Server/ZB.MOM.WW.OtOpcUa.E2ETests) does not yet
|
||||
# exist — it lands when the F-series follow-ups F10/F11/F12 wire enough of the
|
||||
# SDK/historian/probe so an end-to-end driver round-trip is meaningful. Until
|
||||
# then this workflow is a green no-op (the `--filter Category=E2E` matches
|
||||
# zero tests, and `dotnet test` returns 0).
|
||||
|
||||
name: v2-e2e
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 3 * * *"
|
||||
workflow_dispatch: {}
|
||||
|
||||
env:
|
||||
DOTNET_NOLOGO: "1"
|
||||
DOTNET_CLI_TELEMETRY_OPTOUT: "1"
|
||||
|
||||
jobs:
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: dotnet restore
|
||||
run: dotnet restore ZB.MOM.WW.OtOpcUa.slnx
|
||||
|
||||
- name: Build docker-dev fleet
|
||||
run: docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
|
||||
- name: Wait for cluster
|
||||
run: |
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf http://localhost/health/active >/dev/null; then
|
||||
echo "Admin leader healthy after ${i}s"
|
||||
exit 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
echo "Timed out waiting for /health/active"
|
||||
docker compose -f docker-dev/docker-compose.yml logs --tail=200
|
||||
exit 1
|
||||
|
||||
- name: dotnet test (E2E only)
|
||||
run: dotnet test --configuration Release --filter "Category=E2E"
|
||||
|
||||
- name: Tear down
|
||||
if: always()
|
||||
run: docker compose -f docker-dev/docker-compose.yml down -v
|
||||
@@ -21,6 +21,8 @@ desktop.ini
|
||||
# NuGet
|
||||
packages/
|
||||
*.nupkg
|
||||
# … but DO track repo-local feed for mxaccessgw client (not yet on public nuget.org).
|
||||
!nuget-packages/*.nupkg
|
||||
|
||||
# Certificates
|
||||
*.pfx
|
||||
|
||||
@@ -150,3 +150,5 @@ dotnet run --project src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI -- browse -u opc.tc
|
||||
dotnet run --project src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI -- read -u opc.tcp://localhost:4840 -n "ns=2;s=SomeNode"
|
||||
dotnet run --project src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI -- subscribe -u opc.tcp://localhost:4840 -n "ns=2;s=SomeNode" -i 500
|
||||
```
|
||||
|
||||
Address pickers in AdminUI support live browse for OpcUaClient and Galaxy drivers — see `docs/plans/2026-05-28-driver-browsers-design.md`.
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
<Project>
|
||||
<!--
|
||||
Defaults inherited by every csproj. Individual projects may override.
|
||||
|
||||
Deviation from the original v2 plan: TreatWarningsAsErrors is NOT set globally because the
|
||||
pre-v2 test projects (e.g. Admin.Tests) carry 240+ xUnit1051 analyzer warnings that would
|
||||
fail the build. New v2 projects (Commons, Cluster, ControlPlane, Runtime, OpcUaServer, AdminUI,
|
||||
Host, Security) MUST opt in to <TreatWarningsAsErrors>true</TreatWarningsAsErrors> in their
|
||||
own csproj. Once the legacy Admin/Server projects are deleted (Phase 10, Task 56), this can
|
||||
be promoted back to a global default.
|
||||
-->
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<LangVersion>latest</LangVersion>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,102 @@
|
||||
<Project>
|
||||
<PropertyGroup>
|
||||
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageVersion Include="Akka" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Cluster" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Cluster.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Cluster.Tools" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Remote" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Remote.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Streams" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Streams.TestKit" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.TestKit.Xunit2" Version="1.5.62" />
|
||||
<PackageVersion Include="Avalonia" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Controls.DataGrid" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Desktop" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Diagnostics" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Fonts.Inter" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Headless" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Svg.Skia" Version="11.2.0.2" />
|
||||
<PackageVersion Include="Avalonia.Themes.Fluent" Version="11.2.7" />
|
||||
<PackageVersion Include="Beckhoff.TwinCAT.Ads" Version="7.0.172" />
|
||||
<PackageVersion Include="bunit" Version="2.0.33-preview" />
|
||||
<PackageVersion Include="CliFx" Version="2.3.6" />
|
||||
<PackageVersion Include="CommunityToolkit.Mvvm" Version="8.4.0" />
|
||||
<PackageVersion Include="coverlet.collector" Version="6.0.4" />
|
||||
<PackageVersion Include="FluentAssertions" Version="8.3.0" />
|
||||
<PackageVersion Include="Google.Protobuf" Version="3.34.1" />
|
||||
<PackageVersion Include="Grpc.Core.Api" Version="2.76.0" />
|
||||
<PackageVersion Include="Grpc.Net.Client" Version="2.76.0" />
|
||||
<PackageVersion Include="libplctag" Version="1.5.2" />
|
||||
<PackageVersion Include="LiteDB" Version="5.0.21" />
|
||||
<PackageVersion Include="MessagePack" Version="2.5.187" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.Authorization" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.DataProtection" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.DataProtection.EntityFrameworkCore" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.Mvc.Testing" Version="10.0.0" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.SignalR.Client" Version="10.0.0" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.SignalR.Core" Version="1.2.0" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.TestHost" Version="10.0.7" />
|
||||
<!--
|
||||
Roslyn analyzer packages pin to the same major version as the SDK's compiler.
|
||||
.NET SDK 10.0.105 ships compiler 5.0.0.0. Microsoft.CodeAnalysis.CSharp 5.3.x emits
|
||||
analyzer DLLs that reference compiler 5.3.0.0 and fail with CS9057 on the local SDK.
|
||||
Pin to 5.0.0 (matches the compiler the SDK ships) until the SDK rolls to 10.0.110+.
|
||||
-->
|
||||
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp" Version="5.0.0" />
|
||||
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp.Scripting" Version="4.12.0" />
|
||||
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="5.0.0" />
|
||||
<PackageVersion Include="Microsoft.Data.SqlClient" Version="6.1.1" />
|
||||
<PackageVersion Include="Microsoft.Data.Sqlite" Version="9.0.0" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.InMemory" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Configuration.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Configuration.Json" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.DependencyInjection" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Hosting" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Http" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Logging" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Options" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.IdentityModel.Tokens" Version="8.11.0" />
|
||||
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
|
||||
<PackageVersion Include="Microsoft.Playwright" Version="1.51.0" />
|
||||
<PackageVersion Include="Moq" Version="4.20.72" />
|
||||
<PackageVersion Include="Novell.Directory.Ldap.NETStandard" Version="3.6.0" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Client" Version="1.5.378.106" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Configuration" Version="1.5.378.106" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Server" Version="1.5.374.126" />
|
||||
<PackageVersion Include="OpenTelemetry.Exporter.Prometheus.AspNetCore" Version="1.15.3-beta.1" />
|
||||
<PackageVersion Include="OpenTelemetry.Extensions.Hosting" Version="1.15.3" />
|
||||
<PackageVersion Include="Polly.Core" Version="8.6.6" />
|
||||
<PackageVersion Include="S7netplus" Version="0.20.0" />
|
||||
<PackageVersion Include="Serilog" Version="4.3.0" />
|
||||
<PackageVersion Include="Serilog.AspNetCore" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Extensions.Hosting" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Formatting.Compact" Version="3.0.0" />
|
||||
<PackageVersion Include="Serilog.Settings.Configuration" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.Console" Version="6.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.File" Version="7.0.0" />
|
||||
<PackageVersion Include="Shouldly" Version="4.3.0" />
|
||||
<PackageVersion Include="System.CommandLine" Version="2.0.5" />
|
||||
<PackageVersion Include="System.Data.SqlClient" Version="4.9.0" />
|
||||
<PackageVersion Include="System.IdentityModel.Tokens.Jwt" Version="8.11.0" />
|
||||
<PackageVersion Include="System.IO.Pipes.AccessControl" Version="5.0.0" />
|
||||
<PackageVersion Include="System.Memory" Version="4.5.5" />
|
||||
<PackageVersion Include="System.Threading.Tasks.Extensions" Version="4.5.4" />
|
||||
<PackageVersion Include="xunit" Version="2.9.2" />
|
||||
<PackageVersion Include="xunit.runner.visualstudio" Version="3.0.2" />
|
||||
<PackageVersion Include="xunit.v3" Version="1.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Client" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Contracts" Version="0.1.0" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<configuration>
|
||||
<packageSources>
|
||||
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" protocolVersion="3" />
|
||||
<add key="local-mxgw" value="./nuget-packages" />
|
||||
</packageSources>
|
||||
</configuration>
|
||||
+31
-5
@@ -2,6 +2,8 @@
|
||||
<Folder Name="/src/" />
|
||||
<Folder Name="/src/Core/">
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ZB.MOM.WW.OtOpcUa.Core.Abstractions.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Cluster/ZB.MOM.WW.OtOpcUa.Cluster.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Commons/ZB.MOM.WW.OtOpcUa.Commons.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Configuration/ZB.MOM.WW.OtOpcUa.Configuration.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core/ZB.MOM.WW.OtOpcUa.Core.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/ZB.MOM.WW.OtOpcUa.Core.Scripting.csproj" />
|
||||
@@ -10,21 +12,36 @@
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/src/Server/">
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Admin/ZB.MOM.WW.OtOpcUa.Admin.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/ZB.MOM.WW.OtOpcUa.AdminUI.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/ZB.MOM.WW.OtOpcUa.ControlPlane.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Host/ZB.MOM.WW.OtOpcUa.Host.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/ZB.MOM.WW.OtOpcUa.OpcUaServer.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ZB.MOM.WW.OtOpcUa.Runtime.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Security/ZB.MOM.WW.OtOpcUa.Security.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/src/Drivers/">
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Contracts/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ZB.MOM.WW.OtOpcUa.Driver.Modbus.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7/ZB.MOM.WW.OtOpcUa.Driver.S7.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.Contracts/ZB.MOM.WW.OtOpcUa.Driver.S7.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip/ZB.MOM.WW.OtOpcUa.Driver.AbCip.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Contracts/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Contracts/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Contracts/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.Contracts/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Contracts/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/src/Drivers/Driver CLIs/">
|
||||
<Project Path="src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.csproj" />
|
||||
@@ -46,6 +63,7 @@
|
||||
<Folder Name="/tests/" />
|
||||
<Folder Name="/tests/Core/">
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests/ZB.MOM.WW.OtOpcUa.Cluster.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Configuration.Tests/ZB.MOM.WW.OtOpcUa.Configuration.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/ZB.MOM.WW.OtOpcUa.Core.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests.csproj" />
|
||||
@@ -54,12 +72,17 @@
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.Tests/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.Tests.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/Server/">
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/ZB.MOM.WW.OtOpcUa.Server.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests/ZB.MOM.WW.OtOpcUa.Admin.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests/ZB.MOM.WW.OtOpcUa.Admin.E2ETests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/ZB.MOM.WW.OtOpcUa.AdminUI.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/ZB.MOM.WW.OtOpcUa.Runtime.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/ZB.MOM.WW.OtOpcUa.Security.Tests.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/Drivers/">
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser.Tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Tests/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests.csproj" />
|
||||
@@ -77,6 +100,8 @@
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.IntegrationTests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.IntegrationTests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.IntegrationTests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser.Tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser.IntegrationTests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser.IntegrationTests.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/Drivers/Driver CLIs/">
|
||||
<Project Path="tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.Tests/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.Tests.csproj" />
|
||||
@@ -85,6 +110,7 @@
|
||||
<Project Path="tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Cli.Tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Cli.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.S7.Cli.Tests/ZB.MOM.WW.OtOpcUa.Driver.S7.Cli.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Cli.Tests/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Cli.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.Cli.Tests/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.Cli.Tests.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/Client/">
|
||||
<Project Path="tests/Client/ZB.MOM.WW.OtOpcUa.Client.Shared.Tests/ZB.MOM.WW.OtOpcUa.Client.Shared.Tests.csproj" />
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 8 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -62,7 +62,7 @@ assumption precisely.
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `Commands/SubscribeCommand.cs:129-137` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The summary computes `neverWentBad` as every target whose node-id key is
|
||||
absent from the `everBad` dictionary. A node that received no update at all is also absent
|
||||
@@ -78,7 +78,7 @@ streamed only good values.
|
||||
"suspect" list only contains nodes that were actually observed and never reported bad
|
||||
quality.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `neverWentBad` now requires the node to be present in `lastStatus` (i.e. it received at least one update) before being counted, so the "suspect" bucket only contains nodes that were actually observed and never reported bad quality.
|
||||
|
||||
### Client.CLI-003
|
||||
|
||||
@@ -87,7 +87,7 @@ quality.
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `Commands/BrowseCommand.cs:29-30`, `Commands/SubscribeCommand.cs:20-27`, `Commands/AlarmsCommand.cs:28-29`, `Commands/HistoryReadCommand.cs:42-43` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Numeric command options accept any value with no range validation.
|
||||
`--depth`, `--interval`, `--max-depth`, `--max`, and the history `--interval` can all be
|
||||
@@ -100,7 +100,7 @@ is forwarded to `HistoryReadRawAsync`. None of these produce a clear operator-fa
|
||||
`CliFx.Exceptions.CommandException` with an actionable message when a value is out of
|
||||
range.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — every command's `ExecuteAsync` now validates the numeric option ranges (`--interval`, `--depth`, `--max-depth`, `--max`, `--duration`) and throws `CliFx.Exceptions.CommandException` with the offending value when a non-positive (or otherwise out-of-range) value is supplied. Pinned by `CommandRangeValidationTests`.
|
||||
|
||||
### Client.CLI-004
|
||||
|
||||
@@ -109,7 +109,7 @@ range.
|
||||
| Severity | Low |
|
||||
| Category | OtOpcUa conventions |
|
||||
| Location | `Commands/SubscribeCommand.cs:13-37` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `SubscribeCommand` is the only command in the module whose constructor
|
||||
and all `[CommandOption]` properties have no XML doc comments. Every other command
|
||||
@@ -121,7 +121,7 @@ otherwise-uniform documentation convention of the module.
|
||||
**Recommendation:** Add `<summary>` XML docs to the `SubscribeCommand` constructor and to
|
||||
each of its option properties, matching the style used by the sibling commands.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `SubscribeCommand` now carries `<summary>` XML docs on the type, the constructor, every `[CommandOption]` property, and `ExecuteAsync`, matching the style used by the sibling commands.
|
||||
|
||||
### Client.CLI-005
|
||||
|
||||
@@ -156,7 +156,7 @@ callback.
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `Commands/HistoryReadCommand.cs:73`, `Commands/HistoryReadCommand.cs:76`, `Helpers/NodeIdParser.cs:39` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Operator input-format errors surface as raw .NET exceptions rather than
|
||||
clean CLI errors. An unparseable start/end value throws `FormatException` straight out of
|
||||
@@ -170,7 +170,7 @@ is converted to a `CliFx.Exceptions.CommandException` with a clean exit code.
|
||||
`CommandException` with a concise message and a non-zero exit code, so malformed input
|
||||
yields a one-line error instead of a stack trace.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `HistoryReadCommand` parses `--start`/`--end` with `CultureInfo.InvariantCulture` + `AssumeUniversal`/`AdjustToUniversal`, catches `FormatException`, and rethrows as `CommandException` with the offending value. Every command's call to `NodeIdParser.ParseRequired` is wrapped in a `catch (FormatException or ArgumentException)` block that surfaces the underlying message as a clean CLI error. Pinned by `InputValidationErrorsTests`.
|
||||
|
||||
### Client.CLI-007
|
||||
|
||||
@@ -179,7 +179,7 @@ yields a one-line error instead of a stack trace.
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `CommandBase.cs:112-123` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `ConfigureLogging` builds a new Serilog `LoggerConfiguration`, creates a
|
||||
logger, and assigns it to the static `Log.Logger` without disposing the previously
|
||||
@@ -193,7 +193,7 @@ abandons the prior console sink without disposal. The pattern is incorrect:
|
||||
build the logger into a local `ILogger` the command owns and disposes, rather than mutating
|
||||
global static state per command.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `CommandBase.ConfigureLogging` now calls `Log.CloseAndFlush()` before assigning a new `Log.Logger`, so a prior logger's console sink is disposed before the next one is installed. Pinned by `LoggerLifecycleTests`.
|
||||
|
||||
### Client.CLI-008
|
||||
|
||||
@@ -202,7 +202,7 @@ global static state per command.
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `docs/Client.CLI.md:158-217` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `docs/Client.CLI.md` is stale relative to the code at this commit.
|
||||
(1) The `subscribe` command section documents only `-n` and `-i`, but the code
|
||||
@@ -218,7 +218,7 @@ code option description includes it.
|
||||
`docs/Client.CLI.md` from the current option set, including the five new subscribe flags
|
||||
and the `StandardDeviation` aggregate row.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — rewrote the `subscribe` section of `docs/Client.CLI.md` to document every flag (`-r/--recursive`, `--max-depth`, `-q/--quiet`, `--duration`, `--summary-file`) plus the summary-bucket vocabulary, and added the `StandardDeviation` row plus the UTC `--start`/`--end` convention note to the `historyread` section.
|
||||
|
||||
### Client.CLI-009
|
||||
|
||||
@@ -227,7 +227,7 @@ and the `StandardDeviation` aggregate row.
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `Commands/SubscribeCommand.cs:66-165`, `Commands/AlarmsCommand.cs:52-91` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Both long-running commands attach an event handler
|
||||
(`service.DataChanged += ...`, `service.AlarmEvent += ...`) with a lambda and never detach
|
||||
@@ -243,7 +243,7 @@ but never the .NET event.
|
||||
unsubscribing, using a named local delegate so it can be removed, ensuring no notification
|
||||
is processed after the command output phase ends.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `SubscribeCommand` and `AlarmsCommand` declare named local handlers (`DataChangedHandler` / `AlarmEventHandler`) and detach them via `service.DataChanged -= ...` / `service.AlarmEvent -= ...` right after `UnsubscribeAsync` so no notification reaches the console once the command's output phase ends. Pinned by `EventHandlerLifecycleTests`.
|
||||
|
||||
### Client.CLI-010
|
||||
|
||||
@@ -252,7 +252,7 @@ is processed after the command output phase ends.
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `tests/Client/ZB.MOM.WW.OtOpcUa.Client.CLI.Tests/SubscribeCommandTests.cs` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The new `SubscribeCommand` capabilities are largely untested. The four
|
||||
`SubscribeCommandTests` cover only single-node subscribe, unsubscribe-on-cancel,
|
||||
@@ -268,4 +268,4 @@ exit, summary bucketing across good/bad/no-update nodes, and the `--summary-file
|
||||
The `FakeOpcUaClientService` already exposes `RaiseDataChanged`, so feeding good/bad values
|
||||
and asserting the summary text is straightforward.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — added `SubscribeCommandSummaryTests` (covering recursive collection via `FakeOpcUaClientService.AddDiscoveredVariable`, `--duration` auto-exit, summary bucketing for good/bad/never/never-went-bad, and the `--summary-file` write), `CommandRangeValidationTests`, `EventHandlerLifecycleTests`, `InputValidationErrorsTests`, and `LoggerLifecycleTests` to pin the other Low findings; `FakeOpcUaClientService` was extended with `AddDiscoveredVariable` / `RaiseDataChanged` helpers.
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 5 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -63,13 +63,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `Adapters/DefaultSessionAdapter.cs:76`, `Adapters/DefaultSessionAdapter.cs:273` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `WriteValueAsync` returns `response.Results[0]` and `CallMethodAsync` reads `result.Results[0]` without first checking the `Results` collection is non-empty. A malformed or service-level-faulted response (empty `Results` alongside a service fault) produces an `IndexOutOfRangeException` rather than a meaningful OPC UA `StatusCode` or `ServiceResultException`.
|
||||
|
||||
**Recommendation:** Guard both accesses — throw `ServiceResultException` with the response's `ResponseHeader.ServiceResult` (or `BadUnexpectedError`) when `Results` is empty.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — added empty-Results guards to both `WriteValueAsync` (lines 80-85) and `CallMethodAsync` (lines 293-298) in `DefaultSessionAdapter`. Each now throws `ServiceResultException` carrying `response.ResponseHeader.ServiceResult.Code` (or `StatusCodes.BadUnexpectedError` when the header is missing) instead of letting `Results[0]` throw `IndexOutOfRangeException` upstream.
|
||||
|
||||
### Client.Shared-004
|
||||
|
||||
@@ -78,13 +78,13 @@
|
||||
| Severity | Low |
|
||||
| Category | OtOpcUa conventions |
|
||||
| Location | `Adapters/DefaultSessionAdapter.cs:228`, `Adapters/DefaultSessionAdapter.cs:121`, `Adapters/DefaultSessionAdapter.cs:172` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `CloseAsync`, `HistoryReadRawAsync`, and `HistoryReadAggregateAsync` are declared `async Task` but call the synchronous `Session.Close()` / `Session.HistoryRead(...)` APIs and contain no `await`. The history methods run a blocking synchronous service round-trip on the caller's thread; for the UI this blocks the dispatcher thread. The async signature misleads callers, and the `CancellationToken` parameter is ignored on these paths.
|
||||
|
||||
**Recommendation:** Use the stack's async overloads (`Session.HistoryReadAsync`, `Session.CloseAsync`) where available, or wrap the synchronous calls in `Task.Run`, so the methods are genuinely asynchronous and honor the cancellation token.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — replaced the three blocking calls with their async counterparts: `CloseAsync` now awaits `Session.CloseAsync(ct)`, and both `HistoryReadRawAsync` / `HistoryReadAggregateAsync` await `Session.HistoryReadAsync(...)` with `.ConfigureAwait(false)`. All three now honor the `CancellationToken` and no longer block the caller's dispatcher.
|
||||
|
||||
### Client.Shared-005
|
||||
|
||||
@@ -153,13 +153,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience / Documentation & comments |
|
||||
| Location | `OpcUaClientService.cs:302-322` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `AcknowledgeAlarmAsync` is typed `Task<StatusCode>` and its XML doc implies the returned code reports the ack outcome, but the method unconditionally `return StatusCodes.Good`. The actual failure path is `DefaultSessionAdapter.CallMethodAsync`, which throws `ServiceResultException` on a bad call result. A failed acknowledgment therefore never returns a bad `StatusCode` — it throws — and the `StatusCode` return value is dead. Callers writing `if (StatusCode.IsBad(result))` will never see a bad result and will not catch the exception.
|
||||
|
||||
**Recommendation:** Either change the return type to `Task` (and let exceptions signal failure), or catch `ServiceResultException` in `AcknowledgeAlarmAsync` and return its `StatusCode`. Update the XML doc to match whichever is chosen.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `AcknowledgeAlarmAsync` now wraps the `CallMethodAsync` invocation in a try/catch for `ServiceResultException`, logging the failure and returning `ex.StatusCode` so callers using `if (StatusCode.IsBad(result))` see the bad status. The `IOpcUaClientService.AcknowledgeAlarmAsync` XML doc now documents both the Good-on-success and bad-StatusCode-from-ServiceResultException contract. Regression tests `AcknowledgeAlarmAsync_OnSuccess_ReturnsGood` and `AcknowledgeAlarmAsync_OnServiceResultException_ReturnsBadStatusCode` cover both paths.
|
||||
|
||||
### Client.Shared-010
|
||||
|
||||
@@ -168,13 +168,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `Models/ConnectionSettings.cs:48`, `OpcUaClientService.cs:408-417` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `ConnectionSettings.CertificateStorePath` is initialized to `ClientStoragePaths.GetPkiPath()` as a property initializer, so every `ConnectionSettings` instantiation runs `Environment.GetFolderPath` + `Path.Combine` and, on the first call per process, the legacy-folder migration with `Directory.Exists`/`Directory.Move` filesystem IO. `ConnectToEndpointAsync` constructs a fresh `ConnectionSettings` per endpoint on every connect and every failover attempt, so a failover loop across N endpoints does N redundant path resolutions. The `_migrationChecked` fast-path caps the cost, but doing filesystem work in a property initializer is a surprising side effect — constructing a settings object should not touch disk.
|
||||
|
||||
**Recommendation:** Make `CertificateStorePath` default to `string.Empty` and resolve `ClientStoragePaths.GetPkiPath()` lazily inside `DefaultApplicationConfigurationFactory.CreateAsync` only when the path is unset.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `ConnectionSettings.CertificateStorePath` now defaults to `string.Empty` (no filesystem touched on construction), and `DefaultApplicationConfigurationFactory.CreateAsync` resolves the canonical PKI path via `ClientStoragePaths.GetPkiPath()` only when the supplied path is null/whitespace. The settings-default unit test `Defaults_AreSet` was updated to assert the empty default with a comment pointing at this finding ID.
|
||||
|
||||
### Client.Shared-011
|
||||
|
||||
@@ -183,10 +183,10 @@
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `tests/Client/ZB.MOM.WW.OtOpcUa.Client.Shared.Tests/OpcUaClientServiceTests.cs` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The test suite is solid for the happy paths, connection lifecycle, and single-failover behavior. Gaps relative to the findings above: (a) no test exercises concurrent `SubscribeAsync`/failover to expose the `_activeDataSubscriptions` race (Client.Shared-005) or re-entrant keep-alive failures (Client.Shared-006); (b) the alarm fallback path in `OnAlarmEventNotification` (the `Task.Run` supplemental read) is not covered — no test drives an alarm event with missing Acked/Active fields and a non-null ConditionNodeId; (c) `WriteValueAsync` string coercion against an unwritten/`Bad`-status node (Client.Shared-008) is untested; (d) the production adapters (`DefaultSessionAdapter`, `DefaultEndpointDiscovery`) have no unit coverage — understandable since they wrap the SDK, but the `Results[0]` guard gap (Client.Shared-003) and the security-mode endpoint-selection logic are untested.
|
||||
|
||||
**Recommendation:** Add tests for re-entrant/concurrent failover, the alarm fallback path with truncated event fields, and string-write coercion against a typeless node. Extract `DefaultEndpointDiscovery` best-endpoint selection into a pure function so it can be unit tested.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — added the previously-missing unit coverage: (a) `OnAlarmEvent_MissingAckedActiveButHasConditionNode_FallbackReadsAndRaisesEvent` drives the supplemental-read fallback path with null AckedState/ActiveState fields and a non-null SourceNode and asserts the Galaxy attribute reads populate the delivered event; (b) `WriteValueAsync` typeless-node coverage is exercised via the Client.Shared-008 fix that throws a descriptive `InvalidOperationException` on bad/null current reads; (c) `EndpointSelector` was extracted from `DefaultEndpointDiscovery` as a pure static and a new `EndpointSelectorTests` suite (7 tests) covers security-mode selection, the Basic256Sha256 preference, the hostname rewrite, and the null/empty argument guards; (d) acknowledge happy-path and bad-status paths are covered by the two new `AcknowledgeAlarmAsync_*` tests recorded under Client.Shared-009. Concurrent/re-entrant failover coverage already exists via the resolved Client.Shared-005/-006 tests in the suite.
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 6 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -89,7 +89,7 @@ directly so the compiler can prove non-nullness.
|
||||
| Severity | Low |
|
||||
| Category | OtOpcUa conventions |
|
||||
| Location | `ZB.MOM.WW.OtOpcUa.Client.UI.csproj:20-21`, `Program.cs:14-20` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The csproj references `Serilog` and `Serilog.Sinks.Console`, and
|
||||
`docs/Client.UI.md` lists Serilog as the logging technology, but no source file in
|
||||
@@ -104,7 +104,7 @@ rolling daily file sink the project standard calls for) and route Avalonia loggi
|
||||
through it, or drop the unused `Serilog` package references and correct
|
||||
`docs/Client.UI.md`.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — Honoured the CLAUDE.md mandate by wiring up Serilog with a console sink + a rolling daily file sink (`{LocalAppData}/OtOpcUaClient/logs/client-ui-*.log`, retained 14 days). Added `Serilog.Sinks.File` to the csproj and a `ConfigureLogging()` initializer in `Program.Main` that creates `Log.Logger` before `BuildAvaloniaApp()` and calls `Log.CloseAndFlush()` on exit. Each VM that previously had silent swallow blocks now owns a static `Log.ForContext<>()` logger so failures (subscribe, alarm subscribe, redundancy probe, recursive browse) are written to the rolling file. Avalonia's own logging is still routed through `LogToTrace` — replacing that would require a custom `ILogSink` adapter outside the scope of this finding.
|
||||
|
||||
### Client.UI-004
|
||||
|
||||
@@ -113,7 +113,7 @@ through it, or drop the unused `Serilog` package references and correct
|
||||
| Severity | Low |
|
||||
| Category | OtOpcUa conventions |
|
||||
| Location | `Views/MainWindow.axaml.cs:125-138` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `OnBrowseCertPathClicked` uses `OpenFolderDialog`, which is
|
||||
obsolete in Avalonia 11.x (the version pinned in the csproj). The supported
|
||||
@@ -125,7 +125,7 @@ Avalonia major version.
|
||||
**Recommendation:** Migrate the folder chooser to
|
||||
`TopLevel.GetTopLevel(this).StorageProvider.OpenFolderPickerAsync(...)`.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — Replaced `OpenFolderDialog` with `TopLevel.GetTopLevel(this).StorageProvider.OpenFolderPickerAsync(...)`, using `TryGetFolderFromPathAsync(vm.CertificateStorePath)` as the suggested start location and `TryGetLocalPath()` to extract the chosen path. The CS0618 obsoletion warning no longer appears in the build output.
|
||||
|
||||
### Client.UI-005
|
||||
|
||||
@@ -165,7 +165,7 @@ method, not only from `DisconnectAsync`.
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `ViewModels/MainWindowViewModel.cs:244-252`, `ViewModels/AlarmsViewModel.cs:88-112`, `ViewModels/SubscriptionsViewModel.cs:79-94` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Many catch blocks swallow exceptions silently with an empty body
|
||||
and only a comment (`// Redundancy info not available`, `// Subscribe failed`,
|
||||
@@ -180,7 +180,7 @@ permission denial effectively impossible from the UI.
|
||||
message or write the exception to a log. Distinguish "feature not supported"
|
||||
(condition refresh) from "operation failed" so genuine errors are not hidden.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — Added an observable `StatusMessage` property on `SubscriptionsViewModel` and `AlarmsViewModel`; each former silent catch now logs through Serilog (via Client.UI-003's logger) and writes a user-visible message. `MainWindowViewModel.InitializeService` subscribes to both child VMs' `StatusMessage` changes and bubbles them up into the shell's `StatusMessage` (which is already bound to the status bar). Soft conditions are distinguished from hard failures: `RequestConditionRefreshAsync` failures log at Information level and surface as "Condition refresh not supported by server" rather than a generic error, matching the recommendation. Redundancy probe failure still leaves `RedundancyInfo` null but now logs at Information level instead of dropping the exception. Regression tests `AddSubscription_OnFailure_SurfacesStatusMessage`, `AddSubscriptionForNodeAsync_OnFailure_SurfacesStatusMessage`, `Subscribe_OnFailure_SurfacesStatusMessage`, and `ConnectCommand_RedundancyFailure_DoesNotBreakConnection` cover the four affected swallow sites.
|
||||
|
||||
### Client.UI-007
|
||||
|
||||
@@ -239,7 +239,7 @@ any background reconnect timers are leaked until process exit. The
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `ViewModels/HistoryViewModel.cs:44-54` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `HistoryViewModel.AggregateTypes` exposes eight entries: `null`
|
||||
(Raw) plus Average, Minimum, Maximum, Count, Start, End, and `StandardDeviation`.
|
||||
@@ -250,7 +250,7 @@ stale relative to the code.
|
||||
**Recommendation:** Update the "Aggregate" row in `docs/Client.UI.md` to include
|
||||
Standard Deviation.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — Added "Standard Deviation" to the Aggregate row of the Query Options table in `docs/Client.UI.md` so it matches the eighth entry already exposed by `HistoryViewModel.AggregateTypes`.
|
||||
|
||||
### Client.UI-010
|
||||
|
||||
@@ -259,7 +259,7 @@ Standard Deviation.
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `Controls/DateTimeRangePicker.axaml.cs:33-37`, `Controls/DateTimeRangePicker.axaml.cs:70-80` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `DateTimeRangePicker` declares `MinDateTimeProperty` /
|
||||
`MaxDateTimeProperty` styled properties with public CLR accessors, but neither is
|
||||
@@ -272,7 +272,7 @@ constraint the control does not enforce.
|
||||
path (turn out-of-range input red, as invalid input already is) or remove the two
|
||||
unused styled properties.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — Removed `MinDateTimeProperty` / `MaxDateTimeProperty` and their CLR accessors from `DateTimeRangePicker.axaml.cs`. No XAML or external caller binds the properties (grep across the repo confirmed only the control file referenced them), so removing the dead API surface is the correct fix; adding min/max clamping would have been speculative behaviour without a calling site.
|
||||
|
||||
### Client.UI-011
|
||||
|
||||
@@ -281,7 +281,7 @@ unused styled properties.
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `Views/MainWindow.axaml:81`, `Services/JsonSettingsService.cs:11-15` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The certificate-store-path `TextBox` watermark reads
|
||||
`(default: AppData/LmxOpcUaClient/pki)`, referencing the legacy pre-task-#208
|
||||
@@ -293,4 +293,4 @@ that no longer matches where settings and the PKI store actually live.
|
||||
**Recommendation:** Update the watermark to reference `OtOpcUaClient/pki`, or bind
|
||||
it to `ClientStoragePaths.GetPkiPath()` so it cannot drift again.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — Updated the watermark text in `Views/MainWindow.axaml` from `(default: AppData/LmxOpcUaClient/pki)` to `(default: AppData/OtOpcUaClient/pki)` so it matches the canonical folder name resolved by `ClientStoragePaths` (the binding-to-helper alternative was considered but a static string keeps the watermark cheap; the path is also already documented in `docs/Client.UI.md`).
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Review date | 2026-05-23 |
|
||||
| Commit reviewed | `a9be809` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
A comprehensive review completes every category, recording "No issues found" where
|
||||
a category produced nothing rather than leaving it blank.
|
||||
|
||||
### 2026-05-22 review (commit `76d35d1`)
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Core.ScriptedAlarms-002 |
|
||||
@@ -27,6 +29,26 @@ a category produced nothing rather than leaving it blank.
|
||||
| 9 | Testing coverage | Core.ScriptedAlarms-012 |
|
||||
| 10 | Documentation & comments | Core.ScriptedAlarms-003 |
|
||||
|
||||
### 2026-05-23 re-review (commit `a9be809`)
|
||||
|
||||
Focused re-review of the Core.ScriptedAlarms-009 resolution (commit `0001cdd`) —
|
||||
new `AlarmScratch` class, `_scratchByAlarmId` ConcurrentDictionary, `RefillReadCache`
|
||||
helper, and internal test accessors. Only the changed/new code since `76d35d1` was
|
||||
re-examined; existing closed findings stay as audit trail.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | No issues found |
|
||||
| 2 | OtOpcUa conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | No issues found |
|
||||
| 4 | Error handling & resilience | No issues found |
|
||||
| 5 | Security | No issues found |
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | No issues found |
|
||||
| 10 | Documentation & comments | Core.ScriptedAlarms-013 |
|
||||
|
||||
## Findings
|
||||
|
||||
### Core.ScriptedAlarms-001
|
||||
@@ -156,13 +178,29 @@ a category produced nothing rather than leaving it blank.
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `ScriptedAlarmEngine.cs:309-315`, `ScriptedAlarmEngine.cs:271` |
|
||||
| Status | Won't Fix |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `BuildReadCache` allocates a fresh `Dictionary<string, DataValueSnapshot>` on every predicate evaluation, i.e. on every upstream tag change for every referencing alarm. On a busy line where many tags feeding many alarms change frequently, this is a steady stream of short-lived dictionary allocations on the hot path. `AlarmPredicateContext` is also newly constructed each evaluation (line 281).
|
||||
|
||||
**Recommendation:** Minor. If the evaluation path shows up in allocation profiling, the read cache could be a reused per-alarm buffer cleared between evaluations (evaluations are already serialised under `_evalGate`, so a single shared scratch dictionary is safe). Not worth doing speculatively — flag for the perf surface in `docs/v2/Galaxy.Performance.md` if alarm evaluation is ever soak-tested.
|
||||
|
||||
**Resolution:** Won't Fix 2026-05-23 — per the recommendation, no code change. Documented the known allocation characteristic in `docs/v2/Galaxy.Performance.md` (new "Scripted-alarm engine — known hot-path allocations" section) so a future soak that surfaces pressure has a noted mitigation (reused per-alarm scratch buffer) and we don't re-find this in a later review.
|
||||
**Resolution:** Resolved 2026-05-23 — added a per-alarm reusable `AlarmScratch`
|
||||
(read-cache `Dictionary` + `AlarmPredicateContext`) held in
|
||||
`_scratchByAlarmId`, populated lazily on first evaluation and refilled in place
|
||||
by the new `RefillReadCache(Dictionary, IReadOnlySet)` helper on every
|
||||
subsequent re-eval. `BuildReadCache` is gone. The reuse is safe because every
|
||||
evaluation runs under `_evalGate`; the context wraps the dictionary by
|
||||
reference, so the predicate's `ctx.GetTag(path)` sees the freshly-refilled
|
||||
values. `LoadAsync` clears `_scratchByAlarmId` alongside `_alarms` so a
|
||||
config-publish drops the prior generation's scratch (Inputs / Logger may
|
||||
change). Regression tests added in `ScriptedAlarmEngineTests`:
|
||||
`Reevaluation_reuses_the_same_read_cache_dictionary`,
|
||||
`Reevaluation_reuses_the_same_predicate_context`, and
|
||||
`LoadAsync_drops_the_prior_generations_scratch`; internal test hooks
|
||||
`TryGetScratchReadCacheForTest` / `TryGetScratchContextForTest` exposed via
|
||||
the existing `InternalsVisibleTo`. `docs/v2/Galaxy.Performance.md` "Scripted-alarm
|
||||
engine" section rewritten to document the new reuse contract. Suite now 66
|
||||
green (was 63).
|
||||
|
||||
### Core.ScriptedAlarms-010
|
||||
|
||||
@@ -208,3 +246,32 @@ a category produced nothing rather than leaving it blank.
|
||||
**Recommendation:** Add engine-level tests that inject a controllable `Func<DateTime>` clock to drive `RunShelvingCheck`, cover the remaining Part 9 engine methods end-to-end, assert subscriber-exception isolation, and add a store-failure fake to lock in the chosen persistence-failure semantics from finding 007.
|
||||
|
||||
**Resolution:** Resolved 2026-05-22 — added 8 new engine-level tests covering all 6 gap areas: injectable-clock timed-shelve expiry via `RunShelvingCheckForTest`, `ConfirmAsync`/`TimedShelveAsync`/`UnshelveAsync`/`EnableAsync` end-to-end, subscriber-exception isolation, store-failure invariant, second-`LoadAsync` timer-leak regression, and `AreInputsReady` Bad/Uncertain guard; exposed `RunShelvingCheckForTest()` internal hook on the engine.
|
||||
|
||||
### Core.ScriptedAlarms-013
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `ScriptedAlarmEngine.cs:66-81` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The new internal test accessors `TryGetScratchReadCacheForTest` and `TryGetScratchContextForTest` (introduced by the Core.ScriptedAlarms-009 resolution at `0001cdd`) return the *live* per-alarm scratch — the same `Dictionary<string, DataValueSnapshot>` instance the engine clears and refills in `RefillReadCache` under `_evalGate`, plus the `AlarmPredicateContext` that wraps it by reference. The XML docs describe the intended use ("assert the scratch is reused across evaluations (two reads return the same instance)") but do not explicitly warn that:
|
||||
|
||||
1. The returned `IReadOnlyDictionary` is the engine's mutable working set. Enumerating it from a test thread while the engine is mid-evaluation (e.g. during a `ReevaluateAsync` queued by `OnUpstreamChange`, or a `ShelvingCheckAsync` callback) is a concurrent-read-while-writer scenario against a plain `Dictionary` — undefined behaviour, can throw `InvalidOperationException` or return torn data.
|
||||
2. Reference-equality comparisons (`ReferenceEquals(a, b)`) and single-key indexer reads (`dict["Temp"]`) on a quiesced engine are the only safe uses. The existing regression tests stay within those bounds, but a future test author has no in-code signal that broader reads are unsafe.
|
||||
|
||||
The engine itself is correct — `RefillReadCache` runs only under `_evalGate`, so the engine never tears its own state. The risk is purely on the test-side contract.
|
||||
|
||||
**Recommendation:** Add a `<remarks>` block to both `TryGetScratchReadCacheForTest` and `TryGetScratchContextForTest` stating that the returned references point at live engine state, that reads are only safe when the engine is known to be idle (no in-flight `ReevaluateAsync`/`ShelvingCheckAsync`/`LoadAsync`), and that the intended uses are reference-identity assertions plus single-key lookups against a quiesced engine — never enumeration. No code change required; the engine's correctness depends on `_evalGate`, which is already documented.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — applied the recommendation verbatim.
|
||||
Added a `<remarks>` block to each of `TryGetScratchReadCacheForTest` and
|
||||
`TryGetScratchContextForTest` documenting the synchronization contract:
|
||||
the returned references point at live engine state refilled in place under
|
||||
`_evalGate`, enumeration during an in-flight evaluation is a
|
||||
concurrent-read-while-writer scenario against a plain `Dictionary`
|
||||
(undefined behaviour), and the only safe uses are reference-identity
|
||||
comparisons + single-key reads against a quiesced engine. No code change
|
||||
required — the engine's correctness was always there; only the test-side
|
||||
contract was undocumented.
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Review date | 2026-05-23 |
|
||||
| Commit reviewed | `a9be809` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -14,18 +14,23 @@
|
||||
A comprehensive review completes every category, recording "No issues found" where
|
||||
a category produced nothing rather than leaving it blank.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Core.Scripting-004, Core.Scripting-005 |
|
||||
| 2 | OtOpcUa conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | Core.Scripting-006 |
|
||||
| 4 | Error handling & resilience | Core.Scripting-007 |
|
||||
| 5 | Security | Core.Scripting-001, Core.Scripting-002, Core.Scripting-003 |
|
||||
| 6 | Performance & resource management | Core.Scripting-008 |
|
||||
| 7 | Design-document adherence | Core.Scripting-009 |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | Core.Scripting-010, Core.Scripting-011 |
|
||||
| 10 | Documentation & comments | No issues found |
|
||||
The 2026-05-23 re-review only covers code touched between commits `76d35d1` and
|
||||
`a9be809` (primarily the Core.Scripting-008 ALC rewrite + the broadened BCL
|
||||
references). Categories where the new code surface produced no issues are
|
||||
recorded as "No new issues" for that pass.
|
||||
|
||||
| # | Category | Result (76d35d1) | Result (a9be809, new code only) |
|
||||
|---|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Core.Scripting-004, Core.Scripting-005 | Core.Scripting-015 |
|
||||
| 2 | OtOpcUa conventions | No issues found | No new issues |
|
||||
| 3 | Concurrency & thread safety | Core.Scripting-006 | Core.Scripting-014 |
|
||||
| 4 | Error handling & resilience | Core.Scripting-007 | No new issues |
|
||||
| 5 | Security | Core.Scripting-001, Core.Scripting-002, Core.Scripting-003 | Core.Scripting-012, Core.Scripting-013 |
|
||||
| 6 | Performance & resource management | Core.Scripting-008 | Core.Scripting-016 |
|
||||
| 7 | Design-document adherence | Core.Scripting-009 | No new issues |
|
||||
| 8 | Code organization & conventions | No issues found | No new issues |
|
||||
| 9 | Testing coverage | Core.Scripting-010, Core.Scripting-011 | No new issues |
|
||||
| 10 | Documentation & comments | No issues found | No new issues |
|
||||
|
||||
## Findings
|
||||
|
||||
@@ -240,7 +245,7 @@ race ordering.
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `CompiledScriptCache.cs:34`, `ScriptEvaluator.cs:34` |
|
||||
| Status | Won't Fix |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `CompiledScriptCache` has no capacity bound (acknowledged in the class
|
||||
remarks) and no eviction. Each cached `ScriptEvaluator` holds a Roslyn `ScriptRunner<T>`
|
||||
@@ -257,7 +262,33 @@ compile scripts into a collectible `AssemblyLoadContext` so `Clear()` can unload
|
||||
generations. At minimum add a note to `docs/ScriptedAlarms.md` so operators with
|
||||
high-publish-frequency deployments are aware.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — accepted as a documented known limitation rather than fixing in code (collectible `AssemblyLoadContext` for Roslyn-emitted assemblies is a v3 concern). The "Compile cache" section of `docs/VirtualTags.md` now carries a "Per-publish assembly accretion (accepted limitation, Core.Scripting-008)" note that operators with high-publish-frequency deployments can scan, and `docs/ScriptedAlarms.md` cross-references it. The accretion is benign at the expected "low thousands" of scripts scale; recommended mitigation is a scheduled server restart for deployments that publish very frequently.
|
||||
**Resolution:** Resolved 2026-05-23 — switched the compile pipeline off the legacy
|
||||
`CSharpScript.CreateDelegate` path (which emits into the default, non-collectible
|
||||
`AssemblyLoadContext`) and onto a hand-rolled `CSharpCompilation` →
|
||||
`Compilation.Emit(MemoryStream)` → `ScriptAssemblyLoadContext.LoadFromStream` chain,
|
||||
with the new `ScriptAssemblyLoadContext` constructed `isCollectible: true`. Each
|
||||
compiled script lives in its own ALC; `ScriptEvaluator` now implements `IDisposable`
|
||||
and calls `AssemblyLoadContext.Unload()` on dispose. `CompiledScriptCache.Clear()`
|
||||
disposes every materialised evaluator before dropping its dictionary entry, and
|
||||
`CompiledScriptCache` itself is now `IDisposable` for graceful server shutdown.
|
||||
After a publish-replace cycle the prior generation's emitted assemblies become
|
||||
eligible for GC; the reclaim is GC-timing-sensitive (Unload is
|
||||
*eligible-for-collection*, not synchronous) and the next collection cycle reclaims
|
||||
them. The references list is now BCL-wide (System.* + netstandard + Microsoft.Win32.Registry
|
||||
via the TRUSTED_PLATFORM_ASSEMBLIES set) so forbidden BCL types resolve at compile and
|
||||
`ForbiddenTypeAnalyzer` is the sole security gate (consistent with the
|
||||
Core.Scripting-001 / -002 model). `docs/VirtualTags.md` "Compile cache" section rewritten;
|
||||
`docs/ScriptedAlarms.md` cross-reference updated to drop the obsolete restart guidance.
|
||||
Regression tests added in `CompiledScriptCacheTests`:
|
||||
`Dispose_unloads_compiled_script_assembly_load_context`,
|
||||
`Clear_disposes_every_materialised_evaluator`, and
|
||||
`GetOrCompile_after_Dispose_throws_ObjectDisposedException`; the first two
|
||||
prove ALC unload via `WeakReference` + bounded `GC.Collect()` loops. Suite now 104
|
||||
green (was 101). Authoring convention: the synthesized wrapper is an ordinary
|
||||
C# static method, so scripts must end with explicit `return …;` per ordinary C# rules
|
||||
(the legacy `CSharpScript` "last expression yields result" shorthand no longer applies);
|
||||
every script in the existing corpus already uses explicit `return` so this is a doc-only
|
||||
change for new authors.
|
||||
|
||||
### Core.Scripting-009
|
||||
|
||||
@@ -336,3 +367,390 @@ a script logging at Error level produces both a `scripts-*.log` event and a comp
|
||||
Warning event.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — added three new test files: `ScriptSandboxBuildTests` covers the `Build` null / non-`ScriptContext` / base-class / concrete-subclass paths; `ScriptContextTests` locks `Deadband` boundary semantics (equal-to-tolerance returns false; just-over returns true; symmetric in direction; zero-tolerance returns true only on non-equal; negative tolerance trips on any non-equal); the new `Factory_plus_companion_sink_integration_surfaces_script_error_in_both_logs` test in `ScriptLogCompanionSinkTests` wires `ScriptLoggerFactory` + the companion sink together end-to-end and asserts an Error emission lands in both the scripts sink (at Error) and the main sink (at Warning), each tagged with `ScriptName`. Suite now 101 green (was 85 before).
|
||||
|
||||
### Core.Scripting-012
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | High |
|
||||
| Category | Security |
|
||||
| Location | `ForbiddenTypeAnalyzer.cs:60-76`, `ScriptSandbox.cs:96-126` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The Core.Scripting-008 rewrite broadened the BCL references list
|
||||
from a narrow allow-list (`System.Private.CoreLib` + `System.Linq` only) to the
|
||||
full `TRUSTED_PLATFORM_ASSEMBLIES` set filtered to `System.*` + `netstandard` +
|
||||
`Microsoft.Win32.Registry`. This change correctly delegates the security gate to
|
||||
`ForbiddenTypeAnalyzer` (the new comment in `ScriptSandbox` calls this out
|
||||
explicitly), but the analyzer's deny-list has not been expanded to match the new
|
||||
attack surface, and three categories of dangerous BCL types in the `System.*`
|
||||
allow-listed assemblies are now reachable from script source:
|
||||
|
||||
1. **`System.Threading.ThreadPool`** (in namespace `System.Threading`). The
|
||||
Core.Scripting-003 fix added `System.Threading.Tasks` to deny `Task.Run` /
|
||||
`Parallel` fan-out because background work that outlives the per-evaluation
|
||||
timeout is the explicit threat. `ThreadPool.QueueUserWorkItem`,
|
||||
`ThreadPool.UnsafeQueueUserWorkItem`, and `ThreadPool.RegisterWaitForSingleObject`
|
||||
are exactly the same threat — they schedule background work that outlives the
|
||||
`WaitAsync(Timeout)` budget and tie up worker threads — but `System.Threading`
|
||||
itself is allowed (because `CancellationToken` / `SemaphoreSlim` / `Volatile`
|
||||
live there). The Core.Scripting-003 resolution is incomplete on the new
|
||||
reference surface.
|
||||
2. **`System.Threading.Timer`** (same namespace). Schedules a background
|
||||
callback; the script returns control to the engine but the timer keeps
|
||||
firing past the evaluation budget. Same threat as `Task.Run`.
|
||||
3. **`System.Runtime.Loader.AssemblyLoadContext`** (in namespace
|
||||
`System.Runtime.Loader`, which is not denied — only `System.Runtime.InteropServices`
|
||||
is). The constructor + `LoadFromAssemblyPath` / `LoadFromStream` /
|
||||
`LoadFromAssemblyName` let a script load an arbitrary DLL into the host
|
||||
process. Pass (1) of the analyzer resolves the receiver type
|
||||
(`AssemblyLoadContext`, allowed) + the invocation symbol's containing type
|
||||
(also `AssemblyLoadContext`, allowed) and lets the call through. Pass (2)
|
||||
only inspects `TypeSyntax` nodes — if the script discards the returned
|
||||
`Assembly` (e.g. `alc.LoadFromAssemblyPath(@"C:\evil.dll");`) there is no
|
||||
`TypeSyntax` for the analyzer to walk and the call is accepted. Triggering
|
||||
execution of the loaded code from inside the sandbox is hard (most of
|
||||
`Assembly`'s surface is in `System.Reflection`, which is denied) but the
|
||||
defense-in-depth gap is real: an attacker who can author a script also
|
||||
typically controls a file path on the server (Admin UI uploads, share
|
||||
mounts) and loading an assembly is the prerequisite to every chained
|
||||
escape — module initializers, type-resolve handlers, and a future analyzer
|
||||
slip would all become exploitable.
|
||||
|
||||
In addition, two lower-impact `System.*` types are reachable that arguably
|
||||
shouldn't be: **`System.Console.SetOut`** / **`Console.SetError`** could
|
||||
redirect the host's console streams (requires constructing a
|
||||
`System.IO.TextWriter`, which is blocked, so the practical exploit is
|
||||
`Console.WriteLine` log-spam only), and **`System.Globalization.CultureInfo.DefaultThreadCurrentCulture`**
|
||||
could perturb the entire process's formatting behavior (subtle but real cross-script
|
||||
side effect).
|
||||
|
||||
The original Core.Scripting-001 finding called out the model: when an allow-listed
|
||||
namespace contains dangerous types, those types must be denied type-granularly.
|
||||
The new reference surface introduces several more such types and the deny-list
|
||||
has not been kept in sync.
|
||||
|
||||
**Recommendation:** Add `System.Threading.ThreadPool` and `System.Threading.Timer`
|
||||
to `ForbiddenFullTypeNames`. Add `System.Runtime.Loader` as a namespace prefix
|
||||
to `ForbiddenNamespacePrefixes` (every type in `System.Runtime.Loader` —
|
||||
`AssemblyLoadContext`, `AssemblyDependencyResolver`, `AssemblyLoadEventArgs` — is
|
||||
out of script scope). Consider adding `System.Console` to `ForbiddenFullTypeNames`
|
||||
to stop log-spam through the host's console streams, and at minimum document
|
||||
`CultureInfo.DefaultThreadCurrentCulture` as an accepted cross-script side
|
||||
effect. Each addition must have a regression test in `ScriptSandboxTests`
|
||||
mirroring the Core.Scripting-010 vector style. Update
|
||||
`docs/v2/implementation/phase-7-scripting-and-alarming.md` decision #6 + the
|
||||
"Sandbox escape" compliance-check row to enumerate the additions, per the
|
||||
Core.Scripting-009 doc-sync convention.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — added `System.Runtime.Loader` to
|
||||
`ForbiddenNamespacePrefixes` (the namespace-prefix form preferred over
|
||||
type-granular per the recommendation; future BCL additions to that namespace
|
||||
are denied by default). Added `System.Threading.ThreadPool` and
|
||||
`System.Threading.Timer` to `ForbiddenFullTypeNames` — both live in
|
||||
`System.Threading` shared with allowed sync primitives so they must be
|
||||
type-granular. Regression tests added to `ScriptSandboxTests`:
|
||||
`Rejects_ThreadPool_QueueUserWorkItem_at_compile`,
|
||||
`Rejects_Timer_new_at_compile`, `Rejects_AssemblyLoadContext_at_compile`.
|
||||
`docs/v2/implementation/phase-7-scripting-and-alarming.md` decision #6 +
|
||||
the Sandbox-escape compliance-check row both updated per the
|
||||
Core.Scripting-009 doc-sync convention. The two lower-impact suggestions
|
||||
from the recommendation (`System.Console`, `CultureInfo.DefaultThreadCurrentCulture`)
|
||||
were intentionally not addressed: `Console.SetOut` requires constructing
|
||||
a `System.IO.TextWriter` which is already blocked, leaving only
|
||||
`Console.WriteLine` log-spam (annoyance, not a security threat); and
|
||||
`CultureInfo.DefaultThreadCurrentCulture` is a cross-script side-effect
|
||||
worth knowing about but doesn't escape the sandbox. Recording both as
|
||||
accepted minor risks. Test totals after fix: Core.Scripting 107 green
|
||||
(was 104 — +3 new rejection tests).
|
||||
|
||||
### Core.Scripting-013
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Security |
|
||||
| Location | `ScriptEvaluator.cs:202-225` (`BuildWrapperSource`) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The synthesized wrapper pastes the user's source verbatim
|
||||
between `{` and `}` braces inside a static method body, with a `#line 1`
|
||||
directive and no escaping. The legacy `CSharpScript.CreateDelegate` path was
|
||||
robust to this because Roslyn's scripting compiler parses script source as a
|
||||
top-level statement sequence; the new hand-rolled path is parsing ordinary C# in
|
||||
a method body, so a script that injects matching `{` / `}` braces can extend the
|
||||
synthesized compilation unit with additional methods, classes, or `#line`
|
||||
directives. For example, a script body of
|
||||
`return 0; } public static int Evil() { return 0; }} public static class CompiledScript2 { public static void M() {`
|
||||
ends the `Run` method early, declares a sibling `Evil` method (and even a
|
||||
sibling `CompiledScript2` class) inside the synthesized namespace, then opens an
|
||||
unclosed method that consumes the wrapper's trailing `}\n}`. With matching brace
|
||||
counts the script parses cleanly and compiles.
|
||||
|
||||
`ForbiddenTypeAnalyzer` walks every descendant of every syntax tree, so any
|
||||
forbidden BCL types named inside the injected methods are still caught — the
|
||||
finding is **not** a direct sandbox escape. However:
|
||||
|
||||
- It silently relaxes the operator-visible authoring contract documented in
|
||||
`docs/VirtualTags.md` ("scripts are statement bodies that end with an
|
||||
explicit `return …;`") to "scripts can be any compilable C# inside the
|
||||
`CompiledScript` namespace" — operators have access to features the design
|
||||
did not intend to expose (local types defined as siblings of `Run`, custom
|
||||
module initializers via attributes, etc.).
|
||||
- A script can embed its own `#line` directives that override the
|
||||
`#line 1` we emit just above the user source, producing misleading error
|
||||
locations in compiler diagnostics surfaced to the operator.
|
||||
- Future hardening that relies on syntactic-shape assumptions (e.g.
|
||||
"every script has exactly one method") would silently fail.
|
||||
- It widens the analyzer's surface: the analyzer's correctness now depends on
|
||||
Pass (2) correctly walking every conceivable C# construct that can name a
|
||||
type, including ones a normal script body would never contain
|
||||
(`UnmanagedCallersOnly` attribute, function pointer types `delegate*<...>`,
|
||||
pattern types, switch arm types, …).
|
||||
|
||||
**Recommendation:** Either (a) reject scripts whose parsed body contains
|
||||
declarations other than statements — walk the wrapper's syntax tree after parse
|
||||
and require that the only members of `CompiledScript` are the single `Run`
|
||||
method, raising a `CompilationErrorException` if anything else appears — or
|
||||
(b) parse the user source independently as a `BlockSyntax` and inject the
|
||||
parsed block as the method body via the Roslyn syntax API, which makes
|
||||
brace-mismatched / class-injecting source unparseable. Add a regression test
|
||||
covering at least the brace-injection vector
|
||||
(`return 0; } public static int Evil() { return 0;`).
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — took option (a) from the recommendation:
|
||||
added an `EnforceSingleRunMember` step to `ScriptEvaluator.Compile` (runs after
|
||||
`CSharpSyntaxTree.ParseText` of the synthesized wrapper, before Roslyn
|
||||
compile). The check requires exactly one type declaration in the compilation
|
||||
unit (the `CompiledScript` class) AND exactly one member on that class (the
|
||||
`Run` method). Any deviation — a sibling class, an additional namespace, a
|
||||
sibling method or nested type alongside `Run` — throws
|
||||
`CompilationErrorException` with diagnostic IDs `LMX001` / `LMX002` and a
|
||||
message that names Core.Scripting-013 and points at the offending span. Two
|
||||
regression tests added: `Rejects_sibling_method_injection_via_balanced_braces`
|
||||
(injects a sibling method via `} public static int Evil() { …`) and
|
||||
`Rejects_sibling_class_injection_via_balanced_braces` (injects an entire
|
||||
sibling namespace + class). Option (b) (parse the user source independently
|
||||
as a `BlockSyntax` and inject via Roslyn syntax API) was considered but the
|
||||
parse-and-validate approach is more readable, gives clearer error messages,
|
||||
and keeps the wrapper-source generation textual.
|
||||
|
||||
### Core.Scripting-014
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `CompiledScriptCache.cs:91-103` (`Clear`) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `Clear()` snapshots `_cache.Keys.ToArray()` then iterates,
|
||||
calling `TryRemove(key, out var lazy)` on each — the key-only overload, not
|
||||
the value-scoped one used in `GetOrCompile`'s catch block. Between the
|
||||
snapshot and a given `TryRemove`, a concurrent `GetOrCompile(scriptSource)`
|
||||
call that hashes to the same key can re-insert a fresh `Lazy` whose `.Value`
|
||||
the caller already retained. The unconditional `TryRemove` then removes that
|
||||
fresh `Lazy` and `DisposeLazyIfMaterialised(lazy)` calls `Dispose()` on its
|
||||
evaluator — unloading the ALC while the concurrent caller still holds a
|
||||
reference to the evaluator and intends to invoke it.
|
||||
|
||||
This is exactly the race-window pattern the Core.Scripting-006 resolution
|
||||
fixed in `GetOrCompile`'s catch block (the test
|
||||
`Failed_compile_eviction_does_not_remove_a_concurrent_retry_entry` locks it
|
||||
there). `Clear()` carries the same shape but uses the older, value-blind
|
||||
overload, so the same race that finding-006 addresses is still latent on the
|
||||
publish-replace path.
|
||||
|
||||
In current production wiring `Clear()` is intended for config-publish + tests
|
||||
— neither overlaps steady-state evaluation under the documented design — so
|
||||
the in-practice impact is low. But the cache is checked in as the
|
||||
forward-looking compile cache for the engines (per `Script.SourceHash`'s docs
|
||||
and the cache's own remarks); a future wiring that calls `Clear()` from
|
||||
publish while evaluations are in flight would dispose live evaluators.
|
||||
|
||||
**Recommendation:** Replace the snapshot + `TryRemove(key, out var lazy)`
|
||||
sequence with an enumeration that captures the `Lazy` reference at snapshot
|
||||
time and uses the value-scoped `TryRemove(KeyValuePair<,>)` overload, mirroring
|
||||
the Core.Scripting-006 fix:
|
||||
|
||||
```csharp
|
||||
foreach (var entry in _cache.ToArray())
|
||||
{
|
||||
if (_cache.TryRemove(entry))
|
||||
DisposeLazyIfMaterialised(entry.Value);
|
||||
}
|
||||
```
|
||||
|
||||
Add a regression test that races `GetOrCompile` against `Clear` and asserts
|
||||
the caller's evaluator is still usable.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — applied the recommendation verbatim:
|
||||
replaced `foreach (var key in _cache.Keys.ToArray())` + key-only
|
||||
`TryRemove(key, out var lazy)` with `foreach (var entry in _cache.ToArray())` +
|
||||
value-scoped `TryRemove(entry)` (the `KeyValuePair<,>` overload). A concurrent
|
||||
GetOrCompile re-add between the snapshot and the remove inserts a fresh Lazy
|
||||
under the same key; the value-scoped comparison sees the mismatch and leaves
|
||||
the fresh entry intact (instead of evicting + disposing the live evaluator
|
||||
the concurrent caller still holds). Regression test
|
||||
`Clear_uses_value_scoped_TryRemove_so_a_race_inserted_entry_survives` added
|
||||
to `CompiledScriptCacheTests` — single-threaded simulation that snapshots
|
||||
the dict, mutates the entry to a fresh Lazy mid-flight, drives the same
|
||||
value-scoped TryRemove overload Clear now uses, and asserts the fresh entry
|
||||
survives. The two-thread race would be flaky to model directly; the
|
||||
single-threaded semantic test is sufficient because the fix is the
|
||||
overload-selection itself.
|
||||
|
||||
### Core.Scripting-015
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `ScriptEvaluator.cs:234-270` (`ToCSharpTypeName`) |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `ToCSharpTypeName` is documented to handle nested types
|
||||
(`Outer+Inner` → `Outer.Inner`) via `Replace('+', '.')` for the
|
||||
non-generic path (line 269) but the generic path (line 263-266) constructs the
|
||||
name from `def.FullName!` then takes a substring up to the backtick. For a
|
||||
**nested generic** type — e.g. `Outer.Inner<T>` whose `FullName` is
|
||||
`Outer+Inner`1` — `Replace('+', '.')` is applied first, then `Substring(0, IndexOf('`'))`
|
||||
on `"Outer.Inner`1"` produces `"Outer.Inner"`, which is correct. Good.
|
||||
|
||||
However, the generic branch does NOT handle the case where the OPEN generic
|
||||
type itself is nested with `+` inside the parent's name when the parent is
|
||||
also generic (`Outer<TOuter>.Inner<TInner>` — `FullName` is
|
||||
`Outer`1+Inner`1[[TOuter,TInner]]`). For that shape `Substring(0, IndexOf('`'))`
|
||||
truncates at the first backtick — yielding `"Outer.Inner"` — silently dropping
|
||||
the closed type arguments of `Outer<TOuter>`. The resulting source string is
|
||||
syntactically valid but semantically wrong: `global::Outer.Inner<TInner>` does
|
||||
not name `Outer<TOuter>.Inner<TInner>`.
|
||||
|
||||
The production code never hits this shape — `TResult` is always one of
|
||||
`object?`, `bool`, `int`, `double`, `string?`, `DateTime` across the
|
||||
virtual-tag engine, the alarm engine, the test-harness, and the test suite,
|
||||
and `ScriptGlobals<TContext>` is always a top-level generic over a top-level
|
||||
`ScriptContext` subclass. The bug is latent. But it is a foot-gun for a
|
||||
future caller (e.g. a Phase-8 driver that wires a context type defined as a
|
||||
nested generic for grouping reasons) and the XML-doc comment claims
|
||||
"handles nested types" without qualifying it.
|
||||
|
||||
A second smaller correctness gap on the same path: the comment claims
|
||||
`global::`-qualified FQNs prevent accidental capture by the wrapper's `using`
|
||||
directives, which is true for the generic / non-generic branches, but the
|
||||
primitive aliases (`bool`, `int`, `string`, `object`, …) are emitted unqualified.
|
||||
A script that defines a local `class bool` (now possible per Core.Scripting-013)
|
||||
would shadow the alias. Probably benign, but worth a comment.
|
||||
|
||||
**Recommendation:** Add a check in the generic branch that walks the FullName
|
||||
backtick-by-backtick — or use `INamedTypeSymbol`-style name composition from
|
||||
`def.DeclaringType` recursively — so multi-arity-nested generics emit
|
||||
correctly. At minimum update the XML doc to qualify "handles nested types" as
|
||||
"handles single-level nesting; nested generics whose parent is itself generic
|
||||
are not supported". Add a `ToCSharpTypeName` unit test (currently nothing
|
||||
exercises this method directly — coverage relies on the end-to-end compile path,
|
||||
so the bug surfaces only as a misleading Roslyn diagnostic).
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — rewrote the generic-type branch of
|
||||
`ToCSharpTypeName` to walk the `FullName` segment-by-segment (split on `.`
|
||||
after `+ → .` substitution). For each segment ending in `Name\`N`, the
|
||||
algorithm consumes N generic arguments from `t.GetGenericArguments()` in
|
||||
order and emits them as `<…>` on that segment. Nested generic-in-generic
|
||||
shapes (`Outer<T>.Inner<U>`) now emit as
|
||||
`global::Ns.Outer<T>.Inner<U>` (valid C#) rather than the pre-fix
|
||||
`global::Ns.Outer<T, U>` (which dropped the segment boundary entirely
|
||||
because `IndexOf('`')` truncated at the first backtick). No production
|
||||
caller exercises this shape today (all `TContext` / `TResult` types in
|
||||
the codebase are top-level non-nested), so the fix is preemptive — but
|
||||
the algorithm is now correct for any future nested-generic context type.
|
||||
|
||||
### Core.Scripting-016
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagEngine.cs:74-117`, `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/ScriptedAlarmEngine.cs:139-182` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The Core.Scripting-008 resolution introduced
|
||||
`ScriptEvaluator.IDisposable` + `CompiledScriptCache.Clear()` that disposes
|
||||
each materialised evaluator before dropping its dictionary entry, so per-publish
|
||||
ALC accretion is no longer process-lifetime rooted **inside the cache**. But
|
||||
neither production consumer of `ScriptEvaluator` uses the cache — both
|
||||
`VirtualTagEngine.Load` and `ScriptedAlarmEngine.LoadAsync` call
|
||||
`ScriptEvaluator<TContext, TResult>.Compile(...)` directly (lines 105 / 160
|
||||
respectively), store the evaluator inside an internal `VirtualTagState` /
|
||||
`AlarmState` record, and on the next `Load` simply call `_tags.Clear()` /
|
||||
`_alarms.Clear()`. The dropped `ScriptEvaluator` references never have
|
||||
`Dispose()` called on them, so the underlying `ScriptAssemblyLoadContext`
|
||||
instances are never `Unload()`-ed. The .NET runtime guarantees that a
|
||||
collectible ALC stays alive until `Unload()` is called explicitly — having
|
||||
"no strong references" is necessary but not sufficient. So the publish-replace
|
||||
cycle leaks every prior generation's emitted assembly exactly as before the
|
||||
fix, even though the fix's infrastructure is in place.
|
||||
|
||||
The Core.Scripting-008 regression tests in `CompiledScriptCacheTests`
|
||||
(`Dispose_unloads_compiled_script_assembly_load_context` /
|
||||
`Clear_disposes_every_materialised_evaluator`) prove the contract on
|
||||
`CompiledScriptCache`, but neither engine uses that class. There is no
|
||||
integration test exercising the actual publish path — i.e. that calling
|
||||
`VirtualTagEngine.Load(...)` twice with different definitions makes the prior
|
||||
generation's ALC eligible for GC. As a result the fix's headline guarantee
|
||||
("Server restarts are no longer required to reclaim compiled-script memory" —
|
||||
`docs/VirtualTags.md`) is not actually delivered to the production engines.
|
||||
|
||||
This is the same observable behavior the original Core.Scripting-008 finding
|
||||
described, surfacing on a different code path that the resolution did not touch.
|
||||
|
||||
**Recommendation:** Either route the engines' compile path through
|
||||
`CompiledScriptCache<TContext, TResult>` (the documented design — the cache
|
||||
already returns the same evaluator instance for identical source, and its
|
||||
`Clear()` now performs the right disposal — and `Script.SourceHash`'s doc-comment
|
||||
already names this as the cache key), or make the engines' `Load` methods
|
||||
dispose the previous `ScriptEvaluator` instances before reassigning. The
|
||||
former is the cleaner change because it also collapses redundant compiles
|
||||
across publishes for unchanged scripts. Add an integration test along the
|
||||
lines of `CompiledScriptCacheTests.Clear_disposes_every_materialised_evaluator`
|
||||
for each engine: snapshot the per-evaluator emitted assembly via
|
||||
`WeakReference`, call `Load(...)` with a different definition set, and assert
|
||||
the prior generation's assemblies become collectable.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — took the cleaner route from the
|
||||
recommendation: routed both engines' compile paths through
|
||||
`CompiledScriptCache<TContext, TResult>`. `VirtualTagEngine` and
|
||||
`ScriptedAlarmEngine` each gained a private `_compileCache` instance field,
|
||||
their `Load`/`LoadAsync` methods now call `_compileCache.GetOrCompile(source)`
|
||||
instead of `ScriptEvaluator.Compile(source)` directly, and the cache is cleared
|
||||
on publish-replace alongside the existing `_tags` / `_alarms` clears so the
|
||||
prior generation's ALCs are disposed before recompile. Engine `Dispose` now
|
||||
also calls `_compileCache.Dispose()` so the engine-shutdown path actually
|
||||
releases the emitted assemblies. **Side-fix:** discovered + fixed an
|
||||
adjacent bug in `CompiledScriptCache.Dispose()` itself — it set
|
||||
`_disposed = true` before calling `Clear()`, but `Clear()`'s pre-existing
|
||||
`if (_disposed) return` guard then aborted the drain unconditionally, so
|
||||
the Dispose-triggered cleanup was a silent no-op. Removed the disposed-guard
|
||||
on `Clear()` (the operation is idempotent — clearing an empty/cleared cache
|
||||
is safe). Without this side-fix the engine-Dispose path would have left
|
||||
the cached evaluators rooted forever even though the call chain looked
|
||||
correct. **Side-fix for ScriptedAlarmEngine.Dispose:** moved the pre-existing
|
||||
"do NOT clear `_alarms` here" comment to "clear `_alarms` AFTER the drain"
|
||||
because the AlarmState records hold the `TimedScriptEvaluator`/`ScriptEvaluator`
|
||||
delegates that root the emitted assembly — leaving them in `_alarms` after
|
||||
Dispose was the same root-the-script-forever pattern this finding is about,
|
||||
just on the engine side rather than the cache side. The `_alarms` clear is
|
||||
safe after the `Task.WhenAll` drain because that drain guarantees no
|
||||
background callback is mid-flight. Regression tests added:
|
||||
`VirtualTagEngineTests.Dispose_unloads_compiled_script_assembly` and
|
||||
`ScriptedAlarmEngineTests.Dispose_unloads_compiled_predicate_assembly` —
|
||||
each uses `WeakReference` + bounded `GC.Collect()` to prove the emitted
|
||||
assembly is reclaimable after `engine.Dispose()`. **Important test pattern
|
||||
detail:** the alarms test originally failed because its helper was
|
||||
`async Task<WeakReference>` — async state machines capture locals as
|
||||
state-struct fields and can keep them alive past the method's apparent end.
|
||||
Rewrote as a synchronous helper using `LoadAsync(...).GetAwaiter().GetResult()`
|
||||
inside two cooperating `[MethodImpl(MethodImplOptions.NoInlining)]` helpers
|
||||
(`CompileAlarmAndCaptureWeak` + `ExtractEmittedAssemblyWeakRef`) so the
|
||||
intermediate reflection locals die when each helper returns. Test totals
|
||||
after fix: Core.Scripting 104 green (unchanged); VirtualTags 57 green (was
|
||||
56 — +1 unload test); ScriptedAlarms 67 green (was 66 — +1 unload test).
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
|---|---|
|
||||
| Module | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Review date | 2026-05-23 |
|
||||
| Commit reviewed | `a9be809` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 2 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -16,7 +16,7 @@ a category produced nothing rather than leaving it blank.
|
||||
|
||||
| # | Category | Result |
|
||||
|---|---|---|
|
||||
| 1 | Correctness & logic bugs | Driver.Cli.Common-001, Driver.Cli.Common-002 |
|
||||
| 1 | Correctness & logic bugs | Driver.Cli.Common-001, Driver.Cli.Common-002, Driver.Cli.Common-007 |
|
||||
| 2 | OtOpcUa conventions | No issues found |
|
||||
| 3 | Concurrency & thread safety | Driver.Cli.Common-003 |
|
||||
| 4 | Error handling & resilience | Driver.Cli.Common-004 |
|
||||
@@ -24,9 +24,47 @@ a category produced nothing rather than leaving it blank.
|
||||
| 6 | Performance & resource management | No issues found |
|
||||
| 7 | Design-document adherence | No issues found |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | Driver.Cli.Common-005 |
|
||||
| 9 | Testing coverage | Driver.Cli.Common-005, Driver.Cli.Common-008 |
|
||||
| 10 | Documentation & comments | Driver.Cli.Common-006 |
|
||||
|
||||
## Re-review 2026-05-23 (commit `a9be809`)
|
||||
|
||||
Delta scope: commit `5a9c459` extends the `FormatStatus` shortlist with five
|
||||
`Bad*` codes (`BadInternalError` 0x80020000, `BadNotWritable` 0x803B0000,
|
||||
`BadOutOfRange` 0x803C0000, `BadNotSupported` 0x803D0000, `BadDeviceFailure`
|
||||
0x80550000) the FOCAS / AbCip / AbLegacy native-protocol mappers emit. Tests
|
||||
extended with parallel `[InlineData]` rows on the well-known Theory plus a new
|
||||
`FormatStatus_names_native_driver_emitted_codes` Theory.
|
||||
|
||||
Cross-checked the five new hex literals against the OPC Foundation
|
||||
`Opc.Ua.StatusCodes` table via DeepWiki:
|
||||
|
||||
| Name added | Code in shortlist | Spec value | Verdict |
|
||||
|---|---|---|---|
|
||||
| `BadInternalError` | `0x80020000` | `0x80020000` | Correct |
|
||||
| `BadNotWritable` | `0x803B0000` | `0x803B0000` | Correct |
|
||||
| `BadOutOfRange` | `0x803C0000` | `0x803C0000` | Correct |
|
||||
| `BadNotSupported` | `0x803D0000` | `0x803D0000` | Correct |
|
||||
| `BadDeviceFailure` | `0x80550000` | **`0x808B0000`** | **WRONG — `0x80550000` is `BadSecurityPolicyRejected`** |
|
||||
|
||||
The `BadDeviceFailure` mismapping is the same shape of bug as the original
|
||||
Driver.Cli.Common-001 (wrong hex literal copied into the shortlist); recorded
|
||||
as Driver.Cli.Common-007. The wrong constant also lives in
|
||||
`FocasStatusMapper.cs`, `AbCipStatusMapper.cs`, `AbLegacyStatusMapper.cs`,
|
||||
`TwinCATStatusMapper.cs`, `S7Driver.cs`, and `ModbusDriver.cs` — those are in
|
||||
other modules' review scope but are noted here so future re-reviewers know
|
||||
this isn't isolated. (`StatusCodeMap.cs` in Driver.Galaxy + the Wonderware
|
||||
historian mappers use the correct `0x808B0000`, confirming the discrepancy.)
|
||||
|
||||
Testing observation: the new `FormatStatus_names_native_driver_emitted_codes`
|
||||
Theory is fully redundant with the well-known Theory (the five rows were also
|
||||
added there in the same commit) and uses `ShouldContain` rather than
|
||||
`ShouldBe` — recorded as Driver.Cli.Common-008.
|
||||
|
||||
Other categories (concurrency, security, performance, design-doc adherence,
|
||||
code organisation, documentation) are unchanged by this delta — no new
|
||||
issues found.
|
||||
|
||||
## Findings
|
||||
|
||||
### Driver.Cli.Common-001
|
||||
@@ -130,7 +168,7 @@ dispose the previous logger if reconfiguration is genuinely intended.
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:68-70` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `FormatTable` calls `rows.Max(r => r.Tag.Length)` (and the same for the
|
||||
value and status columns) without guarding against empty input. When `tagNames` and
|
||||
@@ -143,7 +181,13 @@ instead of producing an empty (header-only) table.
|
||||
separator, or an explicit "no rows" line), or use `DefaultIfEmpty(0).Max(...)` for the
|
||||
width computations.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — `FormatTable` guards each `rows.Max(...)` width
|
||||
computation with a `rows.Length == 0 ? "<HEADER>".Length : Math.Max(...)` ternary, so
|
||||
an empty batch read returns the header + separator rows (no data rows) instead of
|
||||
throwing `InvalidOperationException`. The fix was landed in commit `1433a1c` alongside
|
||||
the -002 work, and the regression test
|
||||
`SnapshotFormatterTests.FormatTable_with_empty_input_returns_header_only` (added under
|
||||
-005) exercises it.
|
||||
|
||||
### Driver.Cli.Common-005
|
||||
|
||||
@@ -178,7 +222,7 @@ empty-input and `DriverCommandBase` level-selection tests.
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:71`, `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/DriverCommandBase.cs:9` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Two minor doc inaccuracies. (1) The comment at `SnapshotFormatter.cs:71`
|
||||
states the "source-time column is fixed-width (ISO-8601 to ms) so no max-measurement
|
||||
@@ -194,4 +238,127 @@ library. The XML doc is stale relative to the shipped driver-CLI set.
|
||||
right-most and intentionally unpadded rather than claiming fixed width. Add FOCAS to the
|
||||
`DriverCommandBase` class-summary driver list.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — (1) `SnapshotFormatter.cs:71` comment reworded
|
||||
to state the source-time column is the right-most one and intentionally not
|
||||
measured/padded, calling out the null-timestamp `"-"` case explicitly. (2) FOCAS was
|
||||
added to the `DriverCommandBase` class-summary driver enumeration in commit `7ff356b`
|
||||
(landed alongside the -003 work).
|
||||
|
||||
### Driver.Cli.Common-007
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | High |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:129` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Commit `5a9c459` added `0x80550000u => "BadDeviceFailure"` to the
|
||||
`FormatStatus` shortlist, but `0x80550000` is the canonical OPC UA spec value for
|
||||
`BadSecurityPolicyRejected`, not `BadDeviceFailure`. The correct spec value for
|
||||
`BadDeviceFailure` is `0x808B0000` (verified against the OPC Foundation
|
||||
`Opc.Ua.StatusCodes` table via DeepWiki; corroborated locally by
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Runtime/StatusCodeMap.cs:40`
|
||||
(`BadDeviceFailure = 0x808B0000u`) and the two Wonderware historian quality mappers,
|
||||
which all hand-pin the correct value).
|
||||
|
||||
This is the same shape of bug Driver.Cli.Common-001 closed: a wrong hex literal
|
||||
in the shortlist that the test theory (`SnapshotFormatterTests.cs:42`) blindly
|
||||
asserts against the same wrong value, so the bug is invisible to CI.
|
||||
|
||||
Practical impact, two-sided:
|
||||
1. A driver that returns the real spec `BadDeviceFailure` (`0x808B0000`) — e.g.
|
||||
the `Driver.Galaxy.StatusCodeMap` path on a deploy-time device fault — falls
|
||||
through the named shortlist entirely. Since Driver.Cli.Common-002 added the
|
||||
severity-class fallback, it now renders as `0x808B0000 (Bad)` instead of
|
||||
`0x808B0000 (BadDeviceFailure)` — operators lose the specific class label
|
||||
`docs/Driver.FOCAS.Cli.md:153` tells them to read off the output.
|
||||
2. A driver that returns `0x80550000` (which `FocasStatusMapper`, `AbCipStatusMapper`,
|
||||
`AbLegacyStatusMapper`, `TwinCATStatusMapper`, `S7Driver`, and `ModbusDriver` all
|
||||
misuse as "BadDeviceFailure") now renders as `0x80550000 (BadDeviceFailure)` —
|
||||
matching driver intent but contradicting the OPC UA spec, which says any client
|
||||
that decodes the same payload using the OPC Foundation stack will see
|
||||
`BadSecurityPolicyRejected`. A security-monitoring tool keying on
|
||||
`BadSecurityPolicyRejected` will fire on a CPU fault, while real
|
||||
`BadSecurityPolicyRejected` returns from the secure-channel layer would be
|
||||
mislabelled as a device fault. Operator-facing CLI output and machine-readable
|
||||
status semantics disagree.
|
||||
|
||||
The deeper bug is the wrong constant in the native-protocol mappers (out of scope
|
||||
for this module), but the `SnapshotFormatter` shortlist is its own
|
||||
spec-authoritative reference point — Driver.Cli.Common-001 explicitly framed the
|
||||
shortlist as canonical, with the in-line "keep [these literals] in sync with [the
|
||||
Opc.Ua.StatusCodes] table" comment at `SnapshotFormatter.cs:112-113`. That
|
||||
contract is now broken.
|
||||
|
||||
**Recommendation:** Change line 129 to `0x808B0000u => "BadDeviceFailure"`. Update
|
||||
the matching `[InlineData]` rows in `SnapshotFormatterTests.cs` (line 42 in the
|
||||
well-known Theory; line 60 in the redundant Theory — see Driver.Cli.Common-008).
|
||||
Also note in the resolution that the native-protocol mappers (FOCAS / AbCip /
|
||||
AbLegacy / TwinCAT / S7 / Modbus) need the same fix recorded against their own
|
||||
module reviews — the constant `0x80550000` should be replaced with `0x808B0000`
|
||||
everywhere it claims to mean `BadDeviceFailure`. Consider Driver.Cli.Common-001's
|
||||
original recommendation again: add a CI test that cross-checks every shortlist
|
||||
entry against `Opc.Ua.StatusCodes` reflection so this class of bug stops
|
||||
recurring.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — corrected `SnapshotFormatter.FormatStatus`
|
||||
to map `0x808B0000u => "BadDeviceFailure"` (was `0x80550000u`). Updated the
|
||||
`InlineData` row in the well-known Theory accordingly; the redundant native-
|
||||
emitted Theory was deleted entirely per Driver.Cli.Common-008. Added a regression
|
||||
row to `FormatStatus_does_not_apply_pre_fix_wrong_names` pinning that
|
||||
`0x80550000` no longer renders as `BadDeviceFailure` (mirroring the
|
||||
Driver.Cli.Common-001 wrong-name guards). The underlying constant was also
|
||||
corrected in all six native-protocol mappers as part of the same commit:
|
||||
`FocasStatusMapper.BadDeviceFailure`, `AbCipStatusMapper.BadDeviceFailure`,
|
||||
`AbLegacyStatusMapper.BadDeviceFailure`, `TwinCATStatusMapper.BadDeviceFailure`,
|
||||
`ModbusDriver.StatusBadDeviceFailure`, `S7Driver.StatusBadDeviceFailure` — all
|
||||
moved from `0x80550000u` to `0x808B0000u`. The three downstream Modbus tests
|
||||
(`ModbusExceptionMapperTests` 3 InlineData rows + 1 ShouldBe assertion;
|
||||
`ExceptionInjectionTests.StatusBadDeviceFailure` constant) updated to expect
|
||||
the corrected code. **Behavior change:** OPC UA clients consuming the native
|
||||
drivers now see the canonical `BadDeviceFailure` (0x808B0000) instead of the
|
||||
misnamed `BadSecurityPolicyRejected` (0x80550000) on device-fault paths —
|
||||
operator-facing CLI output and machine-readable status semantics now agree.
|
||||
Suite totals after fix: Driver.Cli.Common.Tests 43 green (was 48 — minus 5
|
||||
redundant rows); Modbus.Tests 263; AbCip.Tests 262; AbLegacy.Tests 157;
|
||||
FOCAS.Tests 178; S7.Tests 112; TwinCAT.Tests 131; all green. The Opc.Ua.StatusCodes
|
||||
cross-check the recommendation suggested is recorded as a follow-up worth
|
||||
considering but is out of scope for this fix.
|
||||
|
||||
### Driver.Cli.Common-008
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.Tests/SnapshotFormatterTests.cs:50-64` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Commit `5a9c459` adds a new
|
||||
`FormatStatus_names_native_driver_emitted_codes` `[Theory]` whose five
|
||||
`[InlineData]` rows are identical to five rows added to the existing
|
||||
`FormatStatus_names_well_known_status_codes` `[Theory]` in the same commit
|
||||
(lines 32, 39, 40, 41, 42). The new Theory therefore adds no coverage. It is
|
||||
also weaker than the Theory it duplicates: it asserts
|
||||
`output.ShouldContain($"({expectedName})")` (substring match) where the
|
||||
well-known Theory asserts `output.ShouldBe($"0x{status:X8} ({expectedName})")`
|
||||
(exact match including the hex prefix). The substring form would not catch a
|
||||
regression where the hex literal renders wrong but the name is correct.
|
||||
|
||||
This is not a correctness problem — both Theories pass — but it's a
|
||||
copy-paste inconsistency that costs maintainer attention every time someone
|
||||
reads the test file and wonders which Theory is authoritative.
|
||||
|
||||
**Recommendation:** Either (a) delete the new Theory entirely — its five rows
|
||||
are already covered by the well-known Theory in the same commit — or (b) keep
|
||||
it but switch to `ShouldBe($"0x{status:X8} ({expectedName})")` so its
|
||||
assertion strength matches the rest of the file. Option (a) is cleaner: the
|
||||
commit's "operator workflow" intent is documented well enough in the
|
||||
well-known Theory comment block; the redundant Theory is dead weight.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — took option (a): deleted the
|
||||
`FormatStatus_names_native_driver_emitted_codes` Theory entirely. Its five
|
||||
`InlineData` rows are covered by the well-known Theory's `ShouldBe` (strict
|
||||
exact-match assertion), which is the authoritative shortlist test. Landed
|
||||
alongside the Driver.Cli.Common-007 fix in the same commit.
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 5 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -45,7 +45,7 @@ a category produced nothing rather than leaving it blank.
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `Commands/WriteCommand.cs:58-68` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `WriteCommand.ParseValue` parses the numeric `--value` types
|
||||
(`Byte`/`Int16`/`Int32`/`Float32`/`Float64`) with `sbyte.Parse` / `short.Parse`
|
||||
@@ -65,7 +65,16 @@ literal — consistent with how `ParseBool` already handles bad boolean input.
|
||||
The same pattern exists in the sibling S7 CLI; a shared helper in
|
||||
`Driver.Cli.Common` would fix both.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — wrapped the `ParseValue` numeric switch in
|
||||
`try/catch (FormatException)` and `try/catch (OverflowException)` that rethrow as
|
||||
`CliFx.Exceptions.CommandException` with a message naming the `--type` and the
|
||||
offending value, mirroring the friendly text the `Bit` path already produced.
|
||||
Added `WriteCommandParseValueTests` with [Theory] cases covering non-numeric
|
||||
input across `Byte`/`Int16`/`Int32`/`Float32`/`Float64`, overflow edges
|
||||
(sbyte ±1, short max+1, > int.MaxValue), and an assertion that the exception
|
||||
message names both the type and the offending value. A shared `Driver.Cli.Common`
|
||||
helper is the cleaner long-term fix (cross-CLI duplication remains) but is left
|
||||
to the Driver.Cli.Common review per this module's edit scope.
|
||||
|
||||
### Driver.FOCAS.Cli-002
|
||||
|
||||
@@ -74,7 +83,7 @@ The same pattern exists in the sibling S7 CLI; a shared helper in
|
||||
| Severity | Low |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `Commands/SubscribeCommand.cs:45-51` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `subscribe` command attaches an `OnDataChange` handler that
|
||||
calls the synchronous `console.Output.WriteLine`. `OnDataChange` is raised from
|
||||
@@ -93,7 +102,15 @@ console writes with a lock shared between the banner and the handler. Optionally
|
||||
detach the handler in the `finally` block before `ShutdownAsync` for symmetry
|
||||
with the `handle` teardown already present there.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — introduced a `writeLock` shared between the
|
||||
`OnDataChange` handler and the banner write so the poll-engine background thread
|
||||
and the CliFx invocation thread can't interleave partial lines. Added an
|
||||
explanatory comment above the handler explaining the CliFx-`IConsole` rationale
|
||||
and the synchronous-on-background-thread design — mirroring the Modbus / S7
|
||||
copies of this command. Also added a try/catch around the handler body so a
|
||||
transient stdout error cannot tear down the poll loop, and Serilog-warn-logs the
|
||||
swallowed exception. Added `SubscribeCommandConsoleHandlerTests` to guard the
|
||||
`writeLock` + CliFx-`IConsole` rationale against future copy-paste regressions.
|
||||
|
||||
### Driver.FOCAS.Cli-003
|
||||
|
||||
@@ -102,7 +119,7 @@ with the `handle` teardown already present there.
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `FocasCommandBase.cs:19` (`CncPort`), `FocasCommandBase.cs:27` (`TimeoutMs`), `Commands/SubscribeCommand.cs:23` (`IntervalMs`) |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The numeric command options `--cnc-port`, `--timeout-ms`, and
|
||||
`--interval-ms` are accepted without range validation. A zero or negative
|
||||
@@ -120,7 +137,17 @@ timeout and interval strictly positive. The same gap exists across the sibling
|
||||
driver CLIs, so a shared validation helper in `Driver.Cli.Common` is the
|
||||
cleaner fix.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — added a protected `ValidateOptions(int?
|
||||
intervalMs = null)` helper on `FocasCommandBase` that rejects `--cnc-port`
|
||||
outside `1..65535`, non-positive `--timeout-ms`, and non-positive
|
||||
`--interval-ms` (when the caller passes one) with a `CliFx.Exceptions.CommandException`
|
||||
naming the option and the rejected value. `ProbeCommand` / `ReadCommand` /
|
||||
`WriteCommand` call `ValidateOptions()` without an interval, `SubscribeCommand`
|
||||
calls `ValidateOptions(IntervalMs)`. Added `FocasCommandBaseValidationTests`
|
||||
covering accept-defaults, reject out-of-range port (0, -1, 65536), reject
|
||||
non-positive timeout / interval, and skip-interval-when-omitted. A shared
|
||||
helper in `Driver.Cli.Common` is the cleaner cross-CLI fix and is recorded
|
||||
against that module's review.
|
||||
|
||||
### Driver.FOCAS.Cli-004
|
||||
|
||||
@@ -129,7 +156,7 @@ cleaner fix.
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `Commands/ProbeCommand.cs:37,54`; `Commands/ReadCommand.cs:37,46`; `Commands/WriteCommand.cs:45,54`; `Commands/SubscribeCommand.cs:39,73` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Every command declares `await using var driver = new FocasDriver(...)`
|
||||
**and** explicitly calls `await driver.ShutdownAsync(CancellationToken.None)` in
|
||||
@@ -144,7 +171,14 @@ dead weight and obscures intent: a reader cannot tell whether the explicit
|
||||
and rely on `await using` for disposal, or drop `await using` and keep the
|
||||
explicit teardown — but not both. The same redundancy exists in the sibling CLIs.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — dropped the explicit
|
||||
`await driver.ShutdownAsync(CancellationToken.None)` calls from the `finally`
|
||||
blocks of `ProbeCommand`, `ReadCommand`, `WriteCommand`, and `SubscribeCommand`;
|
||||
`await using` is now the sole driver-disposal mechanism per command
|
||||
(`FocasDriver.DisposeAsync` itself runs `ShutdownAsync`). The subscribe command
|
||||
keeps `UnsubscribeAsync` in its finally because that is a subscription-lifecycle
|
||||
concern, not driver disposal. Added `CommandDisposalConventionsTests` to guard
|
||||
the source-level convention against regression.
|
||||
|
||||
### Driver.FOCAS.Cli-005
|
||||
|
||||
@@ -153,7 +187,7 @@ explicit teardown — but not both. The same redundancy exists in the sibling CL
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `Commands/WriteCommand.cs:50`, `Commands/ProbeCommand.cs:50` (via `SnapshotFormatter.FormatStatus`) |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `docs/Driver.FOCAS.Cli.md` documents `BadDeviceFailure` and
|
||||
`BadCommunicationError` as the key diagnostic signals an operator reads off
|
||||
@@ -180,4 +214,18 @@ actually emit — at minimum `BadNotWritable`, `BadOutOfRange`, `BadNotSupported
|
||||
because the gap defeats this module documented `probe`/`write` diagnostic
|
||||
workflow; cross-reference the `Driver.Cli.Common` review.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — the cross-CLI fix landed in `Driver.Cli.Common`:
|
||||
`SnapshotFormatter.FormatStatus` now names `BadInternalError` (0x80020000),
|
||||
`BadNotWritable` (0x803B0000), `BadOutOfRange` (0x803C0000), `BadNotSupported`
|
||||
(0x803D0000), and `BadDeviceFailure` (0x80550000) — the five codes the FOCAS /
|
||||
AbCip / AbLegacy native-protocol mappers all emit but the shortlist previously
|
||||
left unnamed (the canonical `BadTimeout` 0x800A0000 was already added under
|
||||
Driver.Cli.Common-001). FOCAS `probe` / `write` against a non-writable parameter,
|
||||
out-of-range address, unsupported function, busy device, or CNC-handle failure
|
||||
now renders with the named status the `docs/Driver.FOCAS.Cli.md` workflow
|
||||
promises, restoring parity between the docs and the shipped behaviour. Regression
|
||||
`[Theory]` `FormatStatus_names_native_driver_emitted_codes` added to
|
||||
`SnapshotFormatterTests` so the five names can't silently drop out of the
|
||||
shortlist again; the existing well-known shortlist `[Theory]` was extended with
|
||||
the same five entries to enforce the exact `0x... (Name)` rendering. Suite now
|
||||
47 green (was 42).
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
|---|---|
|
||||
| Module | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy` |
|
||||
| Reviewer | Claude Code |
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Review date | 2026-05-23 |
|
||||
| Commit reviewed | `a9be809` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 0 |
|
||||
|
||||
@@ -17,12 +17,39 @@
|
||||
| 2 | OtOpcUa conventions | Driver.Galaxy-005 |
|
||||
| 3 | Concurrency & thread safety | Driver.Galaxy-006, Driver.Galaxy-007 |
|
||||
| 4 | Error handling & resilience | Driver.Galaxy-001, Driver.Galaxy-008, Driver.Galaxy-009 |
|
||||
| 5 | Security | Driver.Galaxy-010 |
|
||||
| 6 | Performance & resource management | Driver.Galaxy-011, Driver.Galaxy-012 |
|
||||
| 7 | Design-document adherence | Driver.Galaxy-013 |
|
||||
| 5 | Security | Driver.Galaxy-010, Driver.Galaxy-015 |
|
||||
| 6 | Performance & resource management | Driver.Galaxy-011, Driver.Galaxy-012, Driver.Galaxy-016 |
|
||||
| 7 | Design-document adherence | Driver.Galaxy-013, Driver.Galaxy-017 |
|
||||
| 8 | Code organization & conventions | No issues found |
|
||||
| 9 | Testing coverage | Driver.Galaxy-014 |
|
||||
| 10 | Documentation & comments | Driver.Galaxy-005, Driver.Galaxy-013 |
|
||||
| 10 | Documentation & comments | Driver.Galaxy-005, Driver.Galaxy-013, Driver.Galaxy-018 |
|
||||
|
||||
## Re-review 2026-05-23 (commit `a9be809`)
|
||||
|
||||
The only code-affecting change since `76d35d1` was commit `994997b` — the
|
||||
sibling `mxaccessgw` repo restructured (the `clients/dotnet/MxGateway.Client`
|
||||
project path and the `MxGateway.Contracts.Proto` namespace both moved), and
|
||||
the driver's path-based `ProjectReference` started producing 87 build errors
|
||||
solution-wide. The fix is build-time only: the broken `ProjectReference` was
|
||||
replaced with `<Reference HintPath="libs\…">` items pointing at vendored
|
||||
binary copies of `MxGateway.Client.dll` (99 KB, May 2026 known-good build)
|
||||
and `MxGateway.Contracts.dll` (490 KB), and five `PackageReference`s that
|
||||
the dropped project was previously providing transitively (`Google.Protobuf`,
|
||||
`Grpc.Core.Api`, `Grpc.Net.Client`, `Microsoft.Extensions.Logging.Abstractions`,
|
||||
`Polly`) were declared explicitly. The matching `Tests` csproj got the same
|
||||
binary `<Reference>` for `MxGateway.Contracts` (replacing its own broken
|
||||
`ProjectReference`). A `libs/README.md` documents what is vendored and the
|
||||
two unwinding paths (sibling restores a client library, or driver migrates
|
||||
to the new `ZB.MOM.WW.MxGateway.Contracts.Proto` namespace + reimplements
|
||||
the `MxGatewayClient` / `MxGatewaySession` / `GalaxyRepositoryClient`
|
||||
wrapper, ~2,200 LoC).
|
||||
|
||||
No `*.cs` file changed; the re-review walked only the categories that apply
|
||||
to a build-time/packaging change. Categories with no new findings:
|
||||
Correctness (1), OtOpcUa conventions (2), Concurrency (3), Error handling
|
||||
(4), Code organization (8), Testing coverage (9). Four new findings are
|
||||
recorded below (Driver.Galaxy-015..018) — none Critical, none High; two
|
||||
Medium, two Low.
|
||||
|
||||
## Findings
|
||||
|
||||
@@ -235,3 +262,126 @@
|
||||
**Recommendation:** Add unit/parity tests covering: (a) stream fault -> supervisor reopen -> EventPump restart -> `OnDataChange` resumes; (b) `ReplayAsync` updates `SubscriptionRegistry` with new handles; (c) `StatusCodeMap.FromMxStatus` for both success and failure `MxStatusProxy` rows; (d) `DataTypeMap` for every Galaxy `mx_data_type` code including 64-bit integer.
|
||||
|
||||
**Resolution:** Resolved 2026-05-22 — added `GalaxyDriverInfrastructureTests` covering `GetMemoryFootprint` (Driver.Galaxy-011) and `IAsyncDisposable` (Driver.Galaxy-007); (a) stream-fault → supervisor reopen → EventPump restart → `OnDataChange` resumes is covered by `EventPumpStreamFaultTests.StreamFault_DrivesReconnectSupervisorReopenReplay` and `FaultedPump_IsNotRestartableInPlace_ButAFreshPumpResumesDispatch` (landed with Driver.Galaxy-001/008 resolution); (b) post-reconnect `ReplayAsync` rebinds handles is covered by `SubscriptionRegistryTests.Rebind_*` suite; (c) `StatusCodeMap.FromMxStatus` success/failure rows are covered by `StatusCodeMapTests.FromMxStatus_SuccessNonZeroAndCategoryOk_IsGood` and `FromMxStatus_SuccessNonZeroButCategoryNotOk_IsNotGood` (landed with Driver.Galaxy-003); (d) `DataTypeMap` for all seven mx_data_type codes including Int64 is covered by `DataTypeMapTests` (landed with Driver.Galaxy-002).
|
||||
|
||||
### Driver.Galaxy-015
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | ~~Medium~~ Low (re-triaged 2026-05-23) |
|
||||
| Category | ~~Security~~ Documentation & comments (re-triaged 2026-05-23) |
|
||||
| Location | `libs/MxGateway.Client.dll`, `libs/MxGateway.Contracts.dll`, `libs/README.md` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Commit `994997b` checks in two binary DLLs (`MxGateway.Client.dll`, 99 840 bytes; `MxGateway.Contracts.dll`, 489 984 bytes) under `src/Drivers/.../Driver.Galaxy/libs/` and references them via `<Reference HintPath="…" />`. These are the only checked-in binary build artefacts in the entire repo (a repo-wide `find` for non-`bin/`/`obj/` `*.dll` under `libs/` returns only these two), so the change sets a precedent. The accompanying `libs/README.md` states the DLLs are "byte-for-byte the build output" of the OtOpcUa team's own code against the gateway's open proto contracts, but there is no recorded provenance — no source-commit SHA from the sibling `mxaccessgw` repo that produced the build, no SHA-256/SHA-512 checksum, no `.gitattributes` rule marking these paths as binary (so a future churn-in-place will balloon the pack file). Without a recorded source commit + checksum it is impossible for a future reviewer/auditor to verify the binaries match a specific revision of the sibling repo — the assertion "we built them, not external" is unverifiable after the fact. Tampering or accidental swap (e.g. someone drops in a different DLL of the same name under the same path) would not be detectable.
|
||||
|
||||
**Recommendation:** (a) Pin the source provenance: add the sibling `mxaccessgw` commit SHA used to build each DLL to `libs/README.md`. (b) Record a SHA-256 of each `.dll` in `libs/README.md` so a future tamper or accidental update is detectable by running `Get-FileHash`/`sha256sum`. (c) Add a `.gitattributes` rule under `libs/` declaring `*.dll binary` (and consider `filter=lfs diff=lfs merge=lfs -text` if/when these need to be updated, to avoid bloating the pack file on every refresh). (d) Optional: a `dotnet test` time-check that compares the on-disk hash to the recorded hash, so a CI run notices if the file drifts from what the README claims.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23. **Severity re-triage:** the original
|
||||
finding framed this as a security concern about "tampering or accidental
|
||||
swap by an unknown third party"; the user clarified that the DLLs are
|
||||
their own code, built from their own `mxaccessgw` project — not third-party
|
||||
binaries. That moves the concern from security (untrusted provenance) to
|
||||
documentation (audit trail). Re-classified as Low Documentation &
|
||||
Comments. Fix: `libs/README.md` now carries a Provenance section that
|
||||
records the source-commit SHA (`dd7ca1634e2d2b8a866c81f0009bf87ee9427750`,
|
||||
extracted from the `AssemblyInformationalVersion` baked into both DLLs by
|
||||
the original build) and SHA-256 checksums of both binaries, plus a
|
||||
re-verification recipe (`sha256sum libs/*.dll` + `ilspycmd <dll> | grep
|
||||
AssemblyInformationalVersion`). Recommendations (c) `.gitattributes` and
|
||||
(d) CI hash-check deferred — the DLLs are essentially frozen until one
|
||||
of the two unwinding paths is taken, so adding LFS or a CI guard would
|
||||
add infrastructure that the unwinding step would then have to remove.
|
||||
Re-open if the vendoring becomes a recurring update target.
|
||||
|
||||
### Driver.Galaxy-016
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Medium |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj:43-47`, `libs/README.md:32-37` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The five new `PackageReference` versions declared in the csproj (`Google.Protobuf` 3.34.1, `Grpc.Core.Api` 2.76.0, `Grpc.Net.Client` 2.71.0, `Microsoft.Extensions.Logging.Abstractions` 10.0.0, `Polly` 8.5.2) do not all match what the vendored `MxGateway.Client.dll` was built against. The DLL's PE metadata (extracted via `System.Reflection.Metadata`) shows references to `Grpc.Net.Client v2.0.0.0`, `Microsoft.Extensions.Logging.Abstractions v10.0.0.0`, and notably `Polly.Core v8.0.0.0` — and the source csproj just before the sibling-repo rename (commit `bd4a09a` from 2026-04-27) declared `Grpc.Net.Client` 2.76.0, `Microsoft.Extensions.Logging.Abstractions` 10.0.7, and `Polly.Core` 8.6.6 — *not* the meta-package `Polly`. Our driver pulls `Polly` 8.5.2 (which transitively pins `Polly.Core` 8.5.2 per its nuspec dependency), so the vendored client actually loads `Polly.Core` 8.5.2 at runtime against code compiled against 8.6.6. Across an 8.5 ↔ 8.6 minor delta this is usually safe (assembly-version is `v8.0.0.0` for both), but it is exactly the skew shape that surfaces as `MissingMethodException` if a 8.6-only API was used in the client. `libs/README.md` claims "versions match what the sibling repo's `ZB.MOM.WW.MxGateway.Contracts.csproj` uses so the gRPC + proto runtime stays binary-compatible" — that statement is correct only for `Google.Protobuf` and `Grpc.Core.Api`; the other three packages do not match.
|
||||
|
||||
**Recommendation:** Reconcile the declared package versions with what the vendored DLLs were built against — bump to `Grpc.Net.Client` 2.76.0, `Microsoft.Extensions.Logging.Abstractions` 10.0.7, swap `Polly` for `Polly.Core` 8.6.6 (the driver does not import the `Polly` legacy v7 surface, only Polly.Core via the client). Alternatively, rebuild the vendored DLLs against the same versions the csproj declares and refresh the binaries. Update `libs/README.md` to record the exact versions the DLLs were built against, so the next vendoring refresh has an authoritative reference.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — took the first option (reconcile
|
||||
declared packages with what the DLL was built against, verified by
|
||||
reflecting `Assembly.GetReferencedAssemblies()` on `MxGateway.Client.dll`).
|
||||
Changes to the csproj: **`Polly` 8.5.2 → `Polly.Core` 8.6.6** (the most
|
||||
consequential — `Polly` (v7 fluent API) and `Polly.Core` (v8 resilience-
|
||||
pipeline API) are different packages, and the DLL was built against
|
||||
`Polly.Core`; the prior `Polly` reference would have failed at runtime
|
||||
with `MissingMethodException` the first time the gateway client's retry
|
||||
pipeline ran). Also bumped `Grpc.Net.Client` 2.71.0 → 2.76.0 and
|
||||
`Microsoft.Extensions.Logging.Abstractions` 10.0.0 → 10.0.7 to match the
|
||||
sibling Server/Worker projects' current versions. `Google.Protobuf`
|
||||
3.34.1 and `Grpc.Core.Api` 2.76.0 already matched; left unchanged.
|
||||
`libs/README.md` rewritten to record what was actually verified
|
||||
(`Assembly.GetReferencedAssemblies()` output + the resolved package
|
||||
versions, including the sibling Server/Worker csproj as the version
|
||||
source-of-truth — the deleted MxGateway.Client.csproj would have been
|
||||
the original source but no longer exists). Verification: solution-wide
|
||||
`dotnet build` clean, Driver.Galaxy.Tests 245/245 pass against the
|
||||
corrected package set.
|
||||
|
||||
### Driver.Galaxy-017
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/` (no source change), gateway proto contract |
|
||||
| Status | Deferred |
|
||||
|
||||
**Description:** The vendored `MxGateway.Contracts.dll` only carries the OLD `MxGateway.Contracts.Proto[.Galaxy]` namespace (PE-namespace dump confirms — `MxGateway.Client`, `MxGateway.Contracts`, `MxGateway.Contracts.Proto`, `MxGateway.Contracts.Proto.Galaxy` only). The sibling `mxaccessgw` repo's live `Protos/mxaccess_gateway.proto`, `mxaccess_worker.proto`, and `galaxy_repository.proto` files now generate into `ZB.MOM.WW.MxGateway.Contracts.Proto.*`. The proto wire format itself can still evolve (new RPCs, renamed fields, removed fields) and the driver has no contract-version handshake (a repo-wide search for `ContractVersion|ProtocolVersion|ApiVersion|WireVersion` in the driver returns nothing) — so a gateway service that evolves its proto past what the vendored client knows will fail silently at runtime: gRPC `UNIMPLEMENTED` for a renamed RPC, default-value reads for a removed scalar field, or worse, a wire-tag collision if a field number is reused. The risk surface grew with vendoring: previously the `ProjectReference` would have hard-failed at build time if the proto changed shape; now the driver builds green against a frozen contract that may not match the running gateway.
|
||||
|
||||
**Recommendation:** (a) Add a single `Ping`/`GetVersion` RPC call at gateway-session open, comparing the gateway's reported contract version against a string baked into `libs/README.md` (or a `GatewayContractVersion` const) and refusing the session on mismatch with a clear log. (b) Document in `libs/README.md` the exact mxaccessgw commit SHA (and proto-file SHA-256s) the vendored DLLs were built from, so a parity-rig operator can grep the live gateway for the matching commit. (c) Add a soak/parity test that asserts the live gateway's proto descriptor still matches what the vendored DLL expects — fail loud rather than degrade.
|
||||
|
||||
**Resolution:** Deferred 2026-05-23 — the recommendation's part (b)
|
||||
(record the mxaccessgw source-commit SHA in `libs/README.md`) is satisfied
|
||||
by the Driver.Galaxy-015 resolution, which records both DLLs were built
|
||||
from mxaccessgw commit `dd7ca1634e2d2b8a866c81f0009bf87ee9427750`. Parts
|
||||
(a) and (c) — adding a `GetVersion` RPC at session-open and a parity
|
||||
test against the live gateway's proto descriptor — are substantial new
|
||||
RPC + plumbing work that is not in scope for this code-review-resolution
|
||||
sweep. The risk surface is bounded because either of the two unwinding
|
||||
paths in `libs/README.md` (sibling repo restores `MxGateway.Client.csproj`,
|
||||
or this driver migrates to the new namespace) will move the codebase
|
||||
past the vendoring + close this concern naturally. Re-open if neither
|
||||
unwinding path is taken within the next quarter and the live gateway
|
||||
service does evolve its proto under the driver.
|
||||
|
||||
### Driver.Galaxy-018
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `libs/README.md:32-37`, `ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj:40-47` |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Several small documentation issues in the vendoring artefacts:
|
||||
1. `libs/README.md` says "Versions match what the sibling repo's `ZB.MOM.WW.MxGateway.Contracts.csproj` uses" — but `ZB.MOM.WW.MxGateway.Contracts.csproj` only declares `Google.Protobuf` 3.34.1 and `Grpc.Core.Api` 2.76.0; the other three packages (`Grpc.Net.Client`, `Microsoft.Extensions.Logging.Abstractions`, `Polly`) come from the (now-deleted) `MxGateway.Client.csproj`, not the contracts csproj. The README points at the wrong source-of-truth file. See Driver.Galaxy-016 for the related version-skew issue.
|
||||
2. `libs/README.md` says the DLLs "are built against net10.0" — accurate, but the README should also pin the source-commit SHA from `mxaccessgw` that produced the build (currently no such reference). Without it, "May 2026" is the only locator and a future refresh has no fixed point to roll back to.
|
||||
3. The two `<Reference>` items in the csproj omit `<SpecificVersion>false</SpecificVersion>`. The vendored DLLs carry `AssemblyVersion 1.0.0.0`; MSBuild's default for `<Reference HintPath>` items is `SpecificVersion=true` only when the `Include` attribute contains version info, which it does not here, so this is benign — but spelling it out (`<SpecificVersion>false</SpecificVersion>`) would make a future refresh that bumps the AssemblyVersion robust without csproj edits.
|
||||
4. The csproj `<Reference Include="MxGateway.Client">` value relies on the bare assembly simple-name; an explicit `<Reference Include="MxGateway.Client, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null">` plus `<SpecificVersion>false</SpecificVersion>` would document the contract surface inside the csproj where a reviewer reads it.
|
||||
|
||||
**Recommendation:** (a) Update `libs/README.md` to (i) point at `MxGateway.Client.csproj` for the `Grpc.Net.Client`/`Microsoft.Extensions.Logging.Abstractions`/`Polly` version source, (ii) record the mxaccessgw commit SHA the vendored binaries were built from, and (iii) record SHA-256 hashes (see Driver.Galaxy-015). (b) Add `<SpecificVersion>false</SpecificVersion>` to both `<Reference>` items in the csproj to make the intent explicit and refresh-robust.
|
||||
|
||||
**Resolution:** Resolved 2026-05-23 — most of (a) was addressed alongside
|
||||
Driver.Galaxy-015 + -016: `libs/README.md` rewritten to (i) point at the
|
||||
sibling Server/Worker csproj as the live version source-of-truth (the
|
||||
`MxGateway.Client.csproj` cited in the recommendation no longer exists —
|
||||
the deleted-csproj reference would not have been actionable for a
|
||||
future reader), (ii) record source commit
|
||||
`dd7ca1634e2d2b8a866c81f0009bf87ee9427750`, and (iii) record SHA-256
|
||||
checksums of both vendored DLLs. (b) `<SpecificVersion>false</SpecificVersion>`
|
||||
was intentionally NOT added — the vendored DLL's AssemblyVersion is
|
||||
`1.0.0.0` and MSBuild's default for `<Reference HintPath>` Include="bare-name"
|
||||
items is already `SpecificVersion=false`, so the spelling-it-out
|
||||
recommendation would be cosmetic without changing behaviour. If the
|
||||
vendored DLLs are ever refreshed against a build with a different
|
||||
`AssemblyVersion` the explicit attribute could be added then; for now
|
||||
the existing csproj works correctly.
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 5 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -92,7 +92,7 @@ dead-lettered. Until then, document explicitly that this writer never produces
|
||||
| Severity | Low |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `WonderwareHistorianClient.cs:207`, `WonderwareHistorianClient.cs:132-150` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `_totalQueries` is mutated with `Interlocked.Increment` in `Invoke`, but
|
||||
read inside `GetHealthSnapshot` under `_healthLock`, and every other counter
|
||||
@@ -106,7 +106,7 @@ and the counters are advisory, but the mixed model is a latent hazard.
|
||||
`_healthLock` block (a new `RecordQuery()` helper, or fold it into `RecordSuccess`/
|
||||
`RecordFailure`) so all six health fields share a single lock.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — replaced the mixed `Interlocked.Increment(ref _totalQueries)` + `_healthLock`-protected outcome counters with a single `RecordOutcome(bool success, string? error)` helper that increments `_totalQueries` and exactly one of `_totalSuccesses` / `_totalFailures` under one `_healthLock` acquisition; `GetHealthSnapshot` documents the invariant that `TotalSuccesses + TotalFailures == TotalQueries` at every observed snapshot. Added the regression test `GetHealthSnapshot_ConcurrentCallsAndReads_CountersAreInternallyConsistent` that runs a polling reader concurrently with 50 calls and asserts the invariant never breaks (fails red against the previous code, passes green now).
|
||||
|
||||
### Driver.Historian.Wonderware.Client-004
|
||||
|
||||
@@ -115,7 +115,7 @@ and the counters are advisory, but the mixed model is a latent hazard.
|
||||
| Severity | Low |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `WonderwareHistorianClient.cs:203-267` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** A sidecar-reported failure is recorded in two non-atomic steps under
|
||||
separate lock acquisitions: `Invoke` calls `RecordSuccess()` (line 211) and then the
|
||||
@@ -132,7 +132,7 @@ sidecar-level `Success` flag has been checked, or pass the reply success/error i
|
||||
single `RecordOutcome(bool transportOk, bool sidecarOk, string? error)` that updates all
|
||||
counters under one lock acquisition.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — eliminated the `RecordSuccess` → `ReclassifySuccessAsFailure` undo dance. `InvokeAsync` now takes a `Func<TReply, (bool ok, string? error)>` evaluator, evaluates it once when the transport reply lands, and calls `RecordOutcome(bool success, string? error)` exactly once per call under a single `_healthLock` acquisition. A sidecar-reported failure is now classified as a failure on its first and only counter update — no transient "success then undo" state is observable. The read-side `InvokeAndClassifyAsync` wrapper preserves the prior `InvalidOperationException` throw on sidecar failure. Added regression test `GetHealthSnapshot_SidecarFailure_NeverInflatesSuccessCounter` pinning `TotalSuccesses=0`/`TotalFailures=1` after a sidecar-error call.
|
||||
|
||||
### Driver.Historian.Wonderware.Client-005
|
||||
|
||||
@@ -167,7 +167,7 @@ the reader.
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `Internal/PipeChannel.cs:96-107`, `WonderwareHistorianClientOptions.cs:11-12` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `PipeChannel.InvokeAsync` retries exactly once on transport failure and
|
||||
otherwise propagates. The options expose `ReconnectInitialBackoff` and
|
||||
@@ -182,7 +182,7 @@ or the options are dead config that misleads operators.
|
||||
path, or remove the two unused option fields and their XML docs and state plainly that
|
||||
retry/backoff is owned by the caller (the alarm drain worker / history router).
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — removed the dead `ReconnectInitialBackoff`/`ReconnectMaxBackoff` fields (and their `Effective*` accessors) from `WonderwareHistorianClientOptions` and added a `<remarks>` block stating that retry/backoff is owned by the caller (the alarm drain worker and the read-side history router) and that the channel itself performs exactly one in-place reconnect with no delay. Confirmed no consumer referenced the removed fields (only `code-reviews/` references remain). Solution-level build clean — Server picks up the new options shape without change.
|
||||
|
||||
### Driver.Historian.Wonderware.Client-007
|
||||
|
||||
@@ -218,7 +218,7 @@ deserializing.
|
||||
| Severity | Low |
|
||||
| Category | Security |
|
||||
| Location | `ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.csproj:29-32` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The csproj suppresses two NuGet audit advisories
|
||||
(`GHSA-37gx-xxp4-5rgx`, `GHSA-w3x6-4m5h-cxqf`) for the `MessagePack` 2.5.187 dependency
|
||||
@@ -232,7 +232,7 @@ advisory title, why it does not apply to this module usage, and a revisit trigge
|
||||
follow-up to upgrade `MessagePack` once a patched version is available so the suppressions
|
||||
can be dropped.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — the suppression block in the csproj (already added under finding 007) records each advisory title (GHSA-37gx-xxp4-5rgx unsafe-dynamic-codegen, GHSA-w3x6-4m5h-cxqf typeless-resolver gadget chain), why neither applies to this module (default `StandardResolver` only, no `TypelessContractlessStandardResolver` / `DynamicUnion` / `DynamicGenericResolver`, plus the 64 KiB per-sample ValueBytes cap in `DeserializeSampleValue` from finding 007), and the revisit trigger ("Revisit once MessagePack 3.x is available and drop these suppressions at that time"). All three pieces the recommendation asked for are present; the single comment block above both `NuGetAuditSuppress` entries was confirmed to satisfy the audit-trail gap.
|
||||
|
||||
### Driver.Historian.Wonderware.Client-009
|
||||
|
||||
@@ -272,7 +272,7 @@ silent `[Key]` drift between the two duplicated contract sets is caught at build
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `WonderwareHistorianClient.cs:355-361`, `WonderwareHistorianClient.cs:132-150` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Two doc/behaviour mismatches.
|
||||
(1) The `Dispose()` XML comment asserts the underlying channel async cleanup is
|
||||
@@ -291,4 +291,4 @@ node concept. The collapse is reasonable but undocumented.
|
||||
short remark on `GetHealthSnapshot` explaining that the single-channel client maps both
|
||||
connection flags to one transport and does not track per-node health.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-23 — (1) reworded the `Dispose()` XML comment to drop the "non-blocking" claim and instead state that the bridge is **deadlock-safe** because the cleanup never awaits a captured `SynchronizationContext` nor takes any lock the caller could hold, while acknowledging that `NamedPipeClientStream` teardown can block briefly on OS handle release. (2) Added a full `<summary>` + `<remarks>` block to `GetHealthSnapshot` explaining the single-channel collapse — both `ProcessConnectionOpen` and `EventConnectionOpen` report the same channel state, and `ActiveProcessNode`/`ActiveEventNode`/`Nodes` are intentionally null/empty because the client has no per-node telemetry. The remarks also pin the finding-003/004 invariant `TotalSuccesses + TotalFailures == TotalQueries`.
|
||||
|
||||
+55
-44
@@ -12,26 +12,26 @@ Each module's `findings.md` is the source of truth; this file is generated from
|
||||
|---|---|---|---|---|---|---|
|
||||
| [Admin](Admin/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 13 |
|
||||
| [Analyzers](Analyzers/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 7 |
|
||||
| [Client.CLI](Client.CLI/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 8 | 10 |
|
||||
| [Client.Shared](Client.Shared/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 5 | 11 |
|
||||
| [Client.UI](Client.UI/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 6 | 11 |
|
||||
| [Client.CLI](Client.CLI/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 10 |
|
||||
| [Client.Shared](Client.Shared/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 11 |
|
||||
| [Client.UI](Client.UI/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 11 |
|
||||
| [Configuration](Configuration/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 11 |
|
||||
| [Core](Core/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 12 |
|
||||
| [Core.Abstractions](Core.Abstractions/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 8 |
|
||||
| [Core.AlarmHistorian](Core.AlarmHistorian/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 11 |
|
||||
| [Core.ScriptedAlarms](Core.ScriptedAlarms/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 12 |
|
||||
| [Core.Scripting](Core.Scripting/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 11 |
|
||||
| [Core.ScriptedAlarms](Core.ScriptedAlarms/findings.md) | Claude Code | 2026-05-23 | `a9be809` | Reviewed | 0 | 13 |
|
||||
| [Core.Scripting](Core.Scripting/findings.md) | Claude Code | 2026-05-23 | `a9be809` | Reviewed | 0 | 16 |
|
||||
| [Core.VirtualTags](Core.VirtualTags/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 13 |
|
||||
| [Driver.AbCip](Driver.AbCip/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 15 |
|
||||
| [Driver.AbCip.Cli](Driver.AbCip.Cli/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 8 |
|
||||
| [Driver.AbLegacy](Driver.AbLegacy/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 13 |
|
||||
| [Driver.AbLegacy.Cli](Driver.AbLegacy.Cli/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 7 |
|
||||
| [Driver.Cli.Common](Driver.Cli.Common/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 2 | 6 |
|
||||
| [Driver.Cli.Common](Driver.Cli.Common/findings.md) | Claude Code | 2026-05-23 | `a9be809` | Reviewed | 0 | 8 |
|
||||
| [Driver.FOCAS](Driver.FOCAS/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 12 |
|
||||
| [Driver.FOCAS.Cli](Driver.FOCAS.Cli/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 5 | 5 |
|
||||
| [Driver.Galaxy](Driver.Galaxy/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 14 |
|
||||
| [Driver.FOCAS.Cli](Driver.FOCAS.Cli/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 5 |
|
||||
| [Driver.Galaxy](Driver.Galaxy/findings.md) | Claude Code | 2026-05-23 | `a9be809` | Reviewed | 0 | 18 |
|
||||
| [Driver.Historian.Wonderware](Driver.Historian.Wonderware/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 12 |
|
||||
| [Driver.Historian.Wonderware.Client](Driver.Historian.Wonderware.Client/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 5 | 10 |
|
||||
| [Driver.Historian.Wonderware.Client](Driver.Historian.Wonderware.Client/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 10 |
|
||||
| [Driver.Modbus](Driver.Modbus/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 12 |
|
||||
| [Driver.Modbus.Addressing](Driver.Modbus.Addressing/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 9 |
|
||||
| [Driver.Modbus.Cli](Driver.Modbus.Cli/findings.md) | Claude Code | 2026-05-22 | `76d35d1` | Reviewed | 0 | 8 |
|
||||
@@ -46,39 +46,7 @@ Each module's `findings.md` is the source of truth; this file is generated from
|
||||
|
||||
Findings with status `Open` or `In Progress`, ordered by severity.
|
||||
|
||||
| ID | Severity | Category | Location | Description |
|
||||
|---|---|---|---|---|
|
||||
| Client.CLI-002 | Low | Correctness & logic bugs | `Commands/SubscribeCommand.cs:129-137` | The summary computes `neverWentBad` as every target whose node-id key is absent from the `everBad` dictionary. A node that received no update at all is also absent from `everBad`, so it is counted in `neverWentBad` and printed under the he… |
|
||||
| Client.CLI-003 | Low | Correctness & logic bugs | `Commands/BrowseCommand.cs:29-30`, `Commands/SubscribeCommand.cs:20-27`, `Commands/AlarmsCommand.cs:28-29`, `Commands/HistoryReadCommand.cs:42-43` | Numeric command options accept any value with no range validation. `--depth`, `--interval`, `--max-depth`, `--max`, and the history `--interval` can all be supplied as `0` or a negative number. A negative `--depth`/`--max-depth` silently d… |
|
||||
| Client.CLI-004 | Low | OtOpcUa conventions | `Commands/SubscribeCommand.cs:13-37` | `SubscribeCommand` is the only command in the module whose constructor and all `[CommandOption]` properties have no XML doc comments. Every other command (`ConnectCommand`, `ReadCommand`, `WriteCommand`, `BrowseCommand`, `AlarmsCommand`, `… |
|
||||
| Client.CLI-006 | Low | Error handling & resilience | `Commands/HistoryReadCommand.cs:73`, `Commands/HistoryReadCommand.cs:76`, `Helpers/NodeIdParser.cs:39` | Operator input-format errors surface as raw .NET exceptions rather than clean CLI errors. An unparseable start/end value throws `FormatException` straight out of `DateTime.Parse`; an invalid node id throws `FormatException`/`ArgumentExcept… |
|
||||
| Client.CLI-007 | Low | Performance & resource management | `CommandBase.cs:112-123` | `ConfigureLogging` builds a new Serilog `LoggerConfiguration`, creates a logger, and assigns it to the static `Log.Logger` without disposing the previously assigned logger. For a single CLI invocation this leaks at most one logger and the… |
|
||||
| Client.CLI-008 | Low | Documentation & comments | `docs/Client.CLI.md:158-217` | `docs/Client.CLI.md` is stale relative to the code at this commit. (1) The `subscribe` command section documents only `-n` and `-i`, but the code (`SubscribeCommand`) also exposes `-r/--recursive`, `--max-depth`, `-q/--quiet`, `--duration`… |
|
||||
| Client.CLI-009 | Low | Code organization & conventions | `Commands/SubscribeCommand.cs:66-165`, `Commands/AlarmsCommand.cs:52-91` | Both long-running commands attach an event handler (`service.DataChanged += ...`, `service.AlarmEvent += ...`) with a lambda and never detach it. Because the handler closes over `console`, the captured console and the closure remain refere… |
|
||||
| Client.CLI-010 | Low | Testing coverage | `tests/Client/ZB.MOM.WW.OtOpcUa.Client.CLI.Tests/SubscribeCommandTests.cs` | The new `SubscribeCommand` capabilities are largely untested. The four `SubscribeCommandTests` cover only single-node subscribe, unsubscribe-on-cancel, disconnect-in-finally, and the subscription message. There is no test for the `--recurs… |
|
||||
| Client.Shared-003 | Low | Correctness & logic bugs | `Adapters/DefaultSessionAdapter.cs:76`, `Adapters/DefaultSessionAdapter.cs:273` | `WriteValueAsync` returns `response.Results[0]` and `CallMethodAsync` reads `result.Results[0]` without first checking the `Results` collection is non-empty. A malformed or service-level-faulted response (empty `Results` alongside a servic… |
|
||||
| Client.Shared-004 | Low | OtOpcUa conventions | `Adapters/DefaultSessionAdapter.cs:228`, `Adapters/DefaultSessionAdapter.cs:121`, `Adapters/DefaultSessionAdapter.cs:172` | `CloseAsync`, `HistoryReadRawAsync`, and `HistoryReadAggregateAsync` are declared `async Task` but call the synchronous `Session.Close()` / `Session.HistoryRead(...)` APIs and contain no `await`. The history methods run a blocking synchron… |
|
||||
| Client.Shared-009 | Low | Error handling & resilience / Documentation & comments | `OpcUaClientService.cs:302-322` | `AcknowledgeAlarmAsync` is typed `Task<StatusCode>` and its XML doc implies the returned code reports the ack outcome, but the method unconditionally `return StatusCodes.Good`. The actual failure path is `DefaultSessionAdapter.CallMethodAs… |
|
||||
| Client.Shared-010 | Low | Performance & resource management | `Models/ConnectionSettings.cs:48`, `OpcUaClientService.cs:408-417` | `ConnectionSettings.CertificateStorePath` is initialized to `ClientStoragePaths.GetPkiPath()` as a property initializer, so every `ConnectionSettings` instantiation runs `Environment.GetFolderPath` + `Path.Combine` and, on the first call p… |
|
||||
| Client.Shared-011 | Low | Testing coverage | `tests/Client/ZB.MOM.WW.OtOpcUa.Client.Shared.Tests/OpcUaClientServiceTests.cs` | The test suite is solid for the happy paths, connection lifecycle, and single-failover behavior. Gaps relative to the findings above: (a) no test exercises concurrent `SubscribeAsync`/failover to expose the `_activeDataSubscriptions` race… |
|
||||
| Client.UI-003 | Low | OtOpcUa conventions | `ZB.MOM.WW.OtOpcUa.Client.UI.csproj:20-21`, `Program.cs:14-20` | The csproj references `Serilog` and `Serilog.Sinks.Console`, and `docs/Client.UI.md` lists Serilog as the logging technology, but no source file in the module uses Serilog. `Program.BuildAvaloniaApp()` uses Avalonia's `LogToTrace()` and th… |
|
||||
| Client.UI-004 | Low | OtOpcUa conventions | `Views/MainWindow.axaml.cs:125-138` | `OnBrowseCertPathClicked` uses `OpenFolderDialog`, which is obsolete in Avalonia 11.x (the version pinned in the csproj). The supported replacement is the `StorageProvider` API (`StorageProvider.OpenFolderPickerAsync`). Using the obsolete… |
|
||||
| Client.UI-006 | Low | Error handling & resilience | `ViewModels/MainWindowViewModel.cs:244-252`, `ViewModels/AlarmsViewModel.cs:88-112`, `ViewModels/SubscriptionsViewModel.cs:79-94` | Many catch blocks swallow exceptions silently with an empty body and only a comment (`// Redundancy info not available`, `// Subscribe failed`, `// Subscription failed; no item added`, and others). When a subscribe, alarm-subscribe, or red… |
|
||||
| Client.UI-009 | Low | Design-document adherence | `ViewModels/HistoryViewModel.cs:44-54` | `HistoryViewModel.AggregateTypes` exposes eight entries: `null` (Raw) plus Average, Minimum, Maximum, Count, Start, End, and `StandardDeviation`. `docs/Client.UI.md` ("Query Options" table) lists only "Raw (default), Average, Minimum, Maxi… |
|
||||
| Client.UI-010 | Low | Code organization & conventions | `Controls/DateTimeRangePicker.axaml.cs:33-37`, `Controls/DateTimeRangePicker.axaml.cs:70-80` | `DateTimeRangePicker` declares `MinDateTimeProperty` / `MaxDateTimeProperty` styled properties with public CLR accessors, but neither is read anywhere in the control. `TryParseDateTime`, `OnStartLostFocus`, and `OnEndLostFocus` never clamp… |
|
||||
| Client.UI-011 | Low | Documentation & comments | `Views/MainWindow.axaml:81`, `Services/JsonSettingsService.cs:11-15` | The certificate-store-path `TextBox` watermark reads `(default: AppData/LmxOpcUaClient/pki)`, referencing the legacy pre-task-#208 folder name. Per `CLAUDE.md` / `docs/Client.UI.md` the canonical path is now `{LocalAppData}/OtOpcUaClient/`… |
|
||||
| Driver.Cli.Common-004 | Low | Error handling & resilience | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:68-70` | `FormatTable` calls `rows.Max(r => r.Tag.Length)` (and the same for the value and status columns) without guarding against empty input. When `tagNames` and `snapshots` are both empty (equal length, so the mismatch check at line 56 passes),… |
|
||||
| Driver.Cli.Common-006 | Low | Documentation & comments | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:71`, `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/DriverCommandBase.cs:9` | Two minor doc inaccuracies. (1) The comment at `SnapshotFormatter.cs:71` states the "source-time column is fixed-width (ISO-8601 to ms) so no max-measurement needed" — true only when every snapshot has a non-null `SourceTimestampUtc`. `For… |
|
||||
| Driver.FOCAS.Cli-001 | Low | Error handling & resilience | `Commands/WriteCommand.cs:58-68` | `WriteCommand.ParseValue` parses the numeric `--value` types (`Byte`/`Int16`/`Int32`/`Float32`/`Float64`) with `sbyte.Parse` / `short.Parse` / etc. These throw raw `FormatException` or `OverflowException` for malformed or out-of-range inpu… |
|
||||
| Driver.FOCAS.Cli-002 | Low | Concurrency & thread safety | `Commands/SubscribeCommand.cs:45-51` | The `subscribe` command attaches an `OnDataChange` handler that calls the synchronous `console.Output.WriteLine`. `OnDataChange` is raised from the driver's `PollGroupEngine` tick thread, while the command's main flow writes the "Subscribe… |
|
||||
| Driver.FOCAS.Cli-003 | Low | Error handling & resilience | `FocasCommandBase.cs:19` (`CncPort`), `FocasCommandBase.cs:27` (`TimeoutMs`), `Commands/SubscribeCommand.cs:23` (`IntervalMs`) | The numeric command options `--cnc-port`, `--timeout-ms`, and `--interval-ms` are accepted without range validation. A zero or negative `--cnc-port` produces an invalid `focas://host:<n>` string; `--timeout-ms 0` yields a zero `TimeSpan` o… |
|
||||
| Driver.FOCAS.Cli-004 | Low | Performance & resource management | `Commands/ProbeCommand.cs:37,54`; `Commands/ReadCommand.cs:37,46`; `Commands/WriteCommand.cs:45,54`; `Commands/SubscribeCommand.cs:39,73` | Every command declares `await using var driver = new FocasDriver(...)` |
|
||||
| Driver.FOCAS.Cli-005 | Low | Design-document adherence | `Commands/WriteCommand.cs:50`, `Commands/ProbeCommand.cs:50` (via `SnapshotFormatter.FormatStatus`) | `docs/Driver.FOCAS.Cli.md` documents `BadDeviceFailure` and `BadCommunicationError` as the key diagnostic signals an operator reads off `probe` / `write` output ("A `BadCommunicationError` means ... `BadDeviceFailure` after a successful co… |
|
||||
| Driver.Historian.Wonderware.Client-003 | Low | Concurrency & thread safety | `WonderwareHistorianClient.cs:207`, `WonderwareHistorianClient.cs:132-150` | `_totalQueries` is mutated with `Interlocked.Increment` in `Invoke`, but read inside `GetHealthSnapshot` under `_healthLock`, and every other counter (`_totalSuccesses`, `_totalFailures`, `_consecutiveFailures`) is mutated only under `_hea… |
|
||||
| Driver.Historian.Wonderware.Client-004 | Low | Concurrency & thread safety | `WonderwareHistorianClient.cs:203-267` | A sidecar-reported failure is recorded in two non-atomic steps under separate lock acquisitions: `Invoke` calls `RecordSuccess()` (line 211) and then the caller calls `ThrowIfFailed` which calls `ReclassifySuccessAsFailure()` (line 256), d… |
|
||||
| Driver.Historian.Wonderware.Client-006 | Low | Error handling & resilience | `Internal/PipeChannel.cs:96-107`, `WonderwareHistorianClientOptions.cs:11-12` | `PipeChannel.InvokeAsync` retries exactly once on transport failure and otherwise propagates. The options expose `ReconnectInitialBackoff` and `ReconnectMaxBackoff` and `WonderwareHistorianClientOptions` documents them as exponential backo… |
|
||||
| Driver.Historian.Wonderware.Client-008 | Low | Security | `ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.csproj:29-32` | The csproj suppresses two NuGet audit advisories (`GHSA-37gx-xxp4-5rgx`, `GHSA-w3x6-4m5h-cxqf`) for the `MessagePack` 2.5.187 dependency with no inline comment recording why the suppression is safe, who reviewed it, or when it should be re… |
|
||||
| Driver.Historian.Wonderware.Client-010 | Low | Documentation & comments | `WonderwareHistorianClient.cs:355-361`, `WonderwareHistorianClient.cs:132-150` | Two doc/behaviour mismatches. (1) The `Dispose()` XML comment asserts the underlying channel async cleanup is non-blocking so the `GetAwaiter()/GetResult()` bridge is safe. `PipeChannel.DisposeAsync` calls `ResetTransport()`, which invokes… |
|
||||
_No pending findings._
|
||||
|
||||
## Closed findings
|
||||
|
||||
@@ -107,6 +75,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Core.AlarmHistorian-006 | High | Resolved | Error handling & resilience | `src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/SqliteStoreAndForwardSink.cs:103,135-216` |
|
||||
| Core.ScriptedAlarms-001 | High | Resolved | Concurrency & thread safety | `ScriptedAlarmEngine.cs:175`, `ScriptedAlarmEngine.cs:178`, `ScriptedAlarmEngine.cs:73`, `ScriptedAlarmEngine.cs:368` |
|
||||
| Core.Scripting-002 | High | Resolved | Security | `ForbiddenTypeAnalyzer.cs:70` |
|
||||
| Core.Scripting-012 | High | Resolved | Security | `ForbiddenTypeAnalyzer.cs:60-76`, `ScriptSandbox.cs:96-126` |
|
||||
| Core.VirtualTags-001 | High | Resolved | Correctness & logic bugs | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagEngine.cs:306` |
|
||||
| Driver.AbCip-001 | High | Resolved | Correctness & logic bugs | `AbCipDriver.cs:111`, `AbCipDriver.cs:163-167` |
|
||||
| Driver.AbCip-002 | High | Resolved | Correctness & logic bugs | `AbCipStatusMapper.cs:65-78` |
|
||||
@@ -115,6 +84,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Driver.AbLegacy-001 | High | Resolved | Correctness & logic bugs | `AbLegacyAddress.cs:54`, `AbLegacyDriver.cs:368-374` |
|
||||
| Driver.AbLegacy-006 | High | Resolved | Concurrency & thread safety | `AbLegacyDriver.cs:107-158`, `AbLegacyDriver.cs:162-234`, `LibplctagLegacyTagRuntime.cs` |
|
||||
| Driver.Cli.Common-001 | High | Resolved | Correctness & logic bugs | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:106-119` |
|
||||
| Driver.Cli.Common-007 | High | Resolved | Correctness & logic bugs | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:129` |
|
||||
| Driver.FOCAS-001 | High | Resolved | Correctness & logic bugs | `FocasDriverFactoryExtensions.cs:54-86`, `FocasDriverFactoryExtensions.cs:132-140` |
|
||||
| Driver.FOCAS-002 | High | Resolved | Correctness & logic bugs | `WireFocasClient.cs:164-179`, `FocasDriver.cs:513`, `FocasDriver.cs:593` |
|
||||
| Driver.Galaxy-002 | High | Resolved | Correctness & logic bugs | `Browse/DataTypeMap.cs:13`, `Runtime/MxValueDecoder.cs:9` |
|
||||
@@ -181,6 +151,9 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Core.Scripting-004 | Medium | Resolved | Correctness & logic bugs | `DependencyExtractor.cs:73` |
|
||||
| Core.Scripting-007 | Medium | Resolved | Error handling & resilience | `TimedScriptEvaluator.cs:60` |
|
||||
| Core.Scripting-010 | Medium | Resolved | Testing coverage | `tests/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/ScriptSandboxTests.cs:54` |
|
||||
| Core.Scripting-013 | Medium | Resolved | Security | `ScriptEvaluator.cs:202-225` (`BuildWrapperSource`) |
|
||||
| Core.Scripting-014 | Medium | Resolved | Concurrency & thread safety | `CompiledScriptCache.cs:91-103` (`Clear`) |
|
||||
| Core.Scripting-016 | Medium | Resolved | Performance & resource management | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagEngine.cs:74-117`, `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/ScriptedAlarmEngine.cs:139-182` |
|
||||
| Core.VirtualTags-002 | Medium | Resolved | Correctness & logic bugs | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagEngine.cs:237` |
|
||||
| Core.VirtualTags-003 | Medium | Resolved | Correctness & logic bugs | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagEngine.cs:117-120` |
|
||||
| Core.VirtualTags-005 | Medium | Resolved | Concurrency & thread safety | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagSource.cs:50-64` |
|
||||
@@ -218,6 +191,7 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Driver.Galaxy-009 | Medium | Resolved | Error handling & resilience | `GalaxyDriver.cs:354-371` |
|
||||
| Driver.Galaxy-011 | Medium | Resolved | Performance & resource management | `GalaxyDriver.cs:411` |
|
||||
| Driver.Galaxy-014 | Medium | Resolved | Testing coverage | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy` (module-wide) |
|
||||
| Driver.Galaxy-016 | Medium | Resolved | Performance & resource management | `ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj:43-47`, `libs/README.md:32-37` |
|
||||
| Driver.Historian.Wonderware-002 | Medium | Resolved | Correctness and logic bugs | `Ipc/HistorianFrameHandler.cs:162`, `:181` |
|
||||
| Driver.Historian.Wonderware-003 | Medium | Resolved | Correctness and logic bugs | `Backend/HistorianDataSource.cs:320-323`, `:457-460` |
|
||||
| Driver.Historian.Wonderware-006 | Medium | Resolved | Error handling and resilience | `Ipc/PipeServer.cs:120-128` |
|
||||
@@ -273,6 +247,25 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Analyzers-004 | Low | Resolved | Performance & resource management | `src/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers/UnwrappedCapabilityCallAnalyzer.cs:95-112` |
|
||||
| Analyzers-005 | Low | Resolved | Design-document adherence | `src/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers/UnwrappedCapabilityCallAnalyzer.cs:33-43` |
|
||||
| Analyzers-007 | Low | Resolved | Documentation & comments | `src/Tooling/ZB.MOM.WW.OtOpcUa.Analyzers/UnwrappedCapabilityCallAnalyzer.cs:21-26` |
|
||||
| Client.CLI-002 | Low | Resolved | Correctness & logic bugs | `Commands/SubscribeCommand.cs:129-137` |
|
||||
| Client.CLI-003 | Low | Resolved | Correctness & logic bugs | `Commands/BrowseCommand.cs:29-30`, `Commands/SubscribeCommand.cs:20-27`, `Commands/AlarmsCommand.cs:28-29`, `Commands/HistoryReadCommand.cs:42-43` |
|
||||
| Client.CLI-004 | Low | Resolved | OtOpcUa conventions | `Commands/SubscribeCommand.cs:13-37` |
|
||||
| Client.CLI-006 | Low | Resolved | Error handling & resilience | `Commands/HistoryReadCommand.cs:73`, `Commands/HistoryReadCommand.cs:76`, `Helpers/NodeIdParser.cs:39` |
|
||||
| Client.CLI-007 | Low | Resolved | Performance & resource management | `CommandBase.cs:112-123` |
|
||||
| Client.CLI-008 | Low | Resolved | Documentation & comments | `docs/Client.CLI.md:158-217` |
|
||||
| Client.CLI-009 | Low | Resolved | Code organization & conventions | `Commands/SubscribeCommand.cs:66-165`, `Commands/AlarmsCommand.cs:52-91` |
|
||||
| Client.CLI-010 | Low | Resolved | Testing coverage | `tests/Client/ZB.MOM.WW.OtOpcUa.Client.CLI.Tests/SubscribeCommandTests.cs` |
|
||||
| Client.Shared-003 | Low | Resolved | Correctness & logic bugs | `Adapters/DefaultSessionAdapter.cs:76`, `Adapters/DefaultSessionAdapter.cs:273` |
|
||||
| Client.Shared-004 | Low | Resolved | OtOpcUa conventions | `Adapters/DefaultSessionAdapter.cs:228`, `Adapters/DefaultSessionAdapter.cs:121`, `Adapters/DefaultSessionAdapter.cs:172` |
|
||||
| Client.Shared-009 | Low | Resolved | Error handling & resilience / Documentation & comments | `OpcUaClientService.cs:302-322` |
|
||||
| Client.Shared-010 | Low | Resolved | Performance & resource management | `Models/ConnectionSettings.cs:48`, `OpcUaClientService.cs:408-417` |
|
||||
| Client.Shared-011 | Low | Resolved | Testing coverage | `tests/Client/ZB.MOM.WW.OtOpcUa.Client.Shared.Tests/OpcUaClientServiceTests.cs` |
|
||||
| Client.UI-003 | Low | Resolved | OtOpcUa conventions | `ZB.MOM.WW.OtOpcUa.Client.UI.csproj:20-21`, `Program.cs:14-20` |
|
||||
| Client.UI-004 | Low | Resolved | OtOpcUa conventions | `Views/MainWindow.axaml.cs:125-138` |
|
||||
| Client.UI-006 | Low | Resolved | Error handling & resilience | `ViewModels/MainWindowViewModel.cs:244-252`, `ViewModels/AlarmsViewModel.cs:88-112`, `ViewModels/SubscriptionsViewModel.cs:79-94` |
|
||||
| Client.UI-009 | Low | Resolved | Design-document adherence | `ViewModels/HistoryViewModel.cs:44-54` |
|
||||
| Client.UI-010 | Low | Resolved | Code organization & conventions | `Controls/DateTimeRangePicker.axaml.cs:33-37`, `Controls/DateTimeRangePicker.axaml.cs:70-80` |
|
||||
| Client.UI-011 | Low | Resolved | Documentation & comments | `Views/MainWindow.axaml:81`, `Services/JsonSettingsService.cs:11-15` |
|
||||
| Configuration-004 | Low | Resolved | OtOpcUa conventions | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Enums/NodePermissions.cs:8`, `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/OtOpcUaConfigDbContext.cs:417` |
|
||||
| Configuration-005 | Low | Resolved | Concurrency & thread safety | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/LocalCache/LiteDbConfigCache.cs:50` |
|
||||
| Configuration-007 | Low | Resolved | Error handling & resilience | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Apply/GenerationApplier.cs:44` |
|
||||
@@ -294,14 +287,16 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Core.ScriptedAlarms-003 | Low | Resolved | Documentation & comments | `ScriptedAlarmEngine.cs:343`, `docs/ScriptedAlarms.md:107` |
|
||||
| Core.ScriptedAlarms-006 | Low | Resolved | Concurrency & thread safety | `ScriptedAlarmEngine.cs:232`, `ScriptedAlarmEngine.cs:369` |
|
||||
| Core.ScriptedAlarms-008 | Low | Resolved | Performance & resource management | `Part9StateMachine.cs:261-268` |
|
||||
| Core.ScriptedAlarms-009 | Low | Won't Fix | Performance & resource management | `ScriptedAlarmEngine.cs:309-315`, `ScriptedAlarmEngine.cs:271` |
|
||||
| Core.ScriptedAlarms-009 | Low | Resolved | Performance & resource management | `ScriptedAlarmEngine.cs:309-315`, `ScriptedAlarmEngine.cs:271` |
|
||||
| Core.ScriptedAlarms-010 | Low | Resolved | Design-document adherence | `ScriptedAlarmEngine.cs:325-336`, `AlarmPredicateContext.cs:33-40`, `MessageTemplate.cs:47` |
|
||||
| Core.ScriptedAlarms-011 | Low | Resolved | Code organization & conventions | `Part9StateMachine.cs:275` |
|
||||
| Core.ScriptedAlarms-013 | Low | Resolved | Documentation & comments | `ScriptedAlarmEngine.cs:66-81` |
|
||||
| Core.Scripting-005 | Low | Resolved | Correctness & logic bugs | `DependencyExtractor.cs:97` |
|
||||
| Core.Scripting-006 | Low | Resolved | Concurrency & thread safety | `CompiledScriptCache.cs:55` |
|
||||
| Core.Scripting-008 | Low | Won't Fix | Performance & resource management | `CompiledScriptCache.cs:34`, `ScriptEvaluator.cs:34` |
|
||||
| Core.Scripting-008 | Low | Resolved | Performance & resource management | `CompiledScriptCache.cs:34`, `ScriptEvaluator.cs:34` |
|
||||
| Core.Scripting-009 | Low | Resolved | Design-document adherence | `ForbiddenTypeAnalyzer.cs:45` |
|
||||
| Core.Scripting-011 | Low | Resolved | Testing coverage | `tests/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/` |
|
||||
| Core.Scripting-015 | Low | Resolved | Correctness & logic bugs | `ScriptEvaluator.cs:234-270` (`ToCSharpTypeName`) |
|
||||
| Core.VirtualTags-004 | Low | Resolved | Correctness & logic bugs | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagEngine.cs:349` |
|
||||
| Core.VirtualTags-006 | Low | Resolved | Concurrency & thread safety | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagEngine.cs:177-182`, `:395-401` |
|
||||
| Core.VirtualTags-007 | Low | Resolved | Error handling & resilience | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/TimerTriggerScheduler.cs:58` |
|
||||
@@ -329,15 +324,25 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Driver.AbLegacy.Cli-005 | Low | Resolved | Design-document adherence | `Commands/SubscribeCommand.cs:23-25`, `docs/Driver.AbLegacy.Cli.md:94-96` |
|
||||
| Driver.AbLegacy.Cli-006 | Low | Resolved | Code organization & conventions | `Commands/ProbeCommand.cs:20-22` |
|
||||
| Driver.AbLegacy.Cli-007 | Low | Resolved | Testing coverage | `tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Cli.Tests/WriteCommandParseValueTests.cs` |
|
||||
| Driver.Cli.Common-004 | Low | Resolved | Error handling & resilience | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:68-70` |
|
||||
| Driver.Cli.Common-006 | Low | Resolved | Documentation & comments | `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/SnapshotFormatter.cs:71`, `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common/DriverCommandBase.cs:9` |
|
||||
| Driver.Cli.Common-008 | Low | Resolved | Testing coverage | `tests/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.Cli.Common.Tests/SnapshotFormatterTests.cs:50-64` |
|
||||
| Driver.FOCAS-007 | Low | Resolved | Error handling & resilience | `FocasDriver.cs:140-148`, `FocasDriver.cs:478-484`, `FocasDriver.cs:529-533`, `FocasAlarmProjection.cs:61-63` |
|
||||
| Driver.FOCAS-008 | Low | Resolved | Performance & resource management | `FocasDriver.cs:201`, `FocasDriver.cs:253` |
|
||||
| Driver.FOCAS-009 | Low | Resolved | Design-document adherence | `FocasDriverOptions.cs:110-115`, `FocasDriver.cs:468-486`, `FocasDriverFactoryExtensions.cs:75-80` |
|
||||
| Driver.FOCAS-010 | Low | Resolved | Code organization & conventions | `IFocasClient.cs:210-227` (`FocasOpMode`), `FocasConstants.cs:42-78` (`FocasOperationMode`) |
|
||||
| Driver.FOCAS-011 | Low | Resolved | Code organization & conventions | `IFocasClient.cs:275-287` (`FocasAlarmType`), `FocasAlarmProjection.cs:149-175` |
|
||||
| Driver.FOCAS.Cli-001 | Low | Resolved | Error handling & resilience | `Commands/WriteCommand.cs:58-68` |
|
||||
| Driver.FOCAS.Cli-002 | Low | Resolved | Concurrency & thread safety | `Commands/SubscribeCommand.cs:45-51` |
|
||||
| Driver.FOCAS.Cli-003 | Low | Resolved | Error handling & resilience | `FocasCommandBase.cs:19` (`CncPort`), `FocasCommandBase.cs:27` (`TimeoutMs`), `Commands/SubscribeCommand.cs:23` (`IntervalMs`) |
|
||||
| Driver.FOCAS.Cli-004 | Low | Resolved | Performance & resource management | `Commands/ProbeCommand.cs:37,54`; `Commands/ReadCommand.cs:37,46`; `Commands/WriteCommand.cs:45,54`; `Commands/SubscribeCommand.cs:39,73` |
|
||||
| Driver.FOCAS.Cli-005 | Low | Resolved | Design-document adherence | `Commands/WriteCommand.cs:50`, `Commands/ProbeCommand.cs:50` (via `SnapshotFormatter.FormatStatus`) |
|
||||
| Driver.Galaxy-005 | Low | Resolved | OtOpcUa conventions | `Runtime/EventPump.cs:81-88` |
|
||||
| Driver.Galaxy-010 | Low | Resolved | Security | `GalaxyDriver.cs:311-341` |
|
||||
| Driver.Galaxy-012 | Low | Resolved | Performance & resource management | `Runtime/SubscriptionRegistry.cs:65-67`, `GalaxyDriver.cs:538`, `GalaxyDriver.cs:675` |
|
||||
| Driver.Galaxy-013 | Low | Resolved | Design-document adherence | `GalaxyDriver.cs:14-27`, `GalaxyDriver.cs:374-382`, `Config/GalaxyDriverOptions.cs:84-86` |
|
||||
| Driver.Galaxy-017 | Low | Deferred | Design-document adherence | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/` (no source change), gateway proto contract |
|
||||
| Driver.Galaxy-018 | Low | Resolved | Documentation & comments | `libs/README.md:32-37`, `ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj:40-47` |
|
||||
| Driver.Historian.Wonderware-004 | Low | Resolved | Correctness and logic bugs | `Backend/SdkAlarmHistorianWriteBackend.cs:198-201` |
|
||||
| Driver.Historian.Wonderware-005 | Low | Resolved | Concurrency and thread safety | `Backend/HistorianDataSource.cs:124`, `:126-127` |
|
||||
| Driver.Historian.Wonderware-007 | Low | Resolved | Error handling and resilience | `Ipc/PipeServer.cs:70-75` |
|
||||
@@ -345,6 +350,11 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Driver.Historian.Wonderware-010 | Low | Resolved | Performance and resource management | `Backend/HistorianConfiguration.cs:32-36`, `Backend/HistorianDataSource.cs` (all read methods) |
|
||||
| Driver.Historian.Wonderware-011 | Low | Resolved | Design-document adherence | `Backend/HistorianDataSource.cs:9-12`, `Backend/IHistorianDataSource.cs:9-11`, `Backend/HistorianSample.cs:7-9`, `Backend/HistorianConfiguration.cs:7-9` |
|
||||
| Driver.Historian.Wonderware-012 | Low | Resolved | Testing coverage | `Backend/HistorianDataSource.cs`, `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests/` |
|
||||
| Driver.Historian.Wonderware.Client-003 | Low | Resolved | Concurrency & thread safety | `WonderwareHistorianClient.cs:207`, `WonderwareHistorianClient.cs:132-150` |
|
||||
| Driver.Historian.Wonderware.Client-004 | Low | Resolved | Concurrency & thread safety | `WonderwareHistorianClient.cs:203-267` |
|
||||
| Driver.Historian.Wonderware.Client-006 | Low | Resolved | Error handling & resilience | `Internal/PipeChannel.cs:96-107`, `WonderwareHistorianClientOptions.cs:11-12` |
|
||||
| Driver.Historian.Wonderware.Client-008 | Low | Resolved | Security | `ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.csproj:29-32` |
|
||||
| Driver.Historian.Wonderware.Client-010 | Low | Resolved | Documentation & comments | `WonderwareHistorianClient.cs:355-361`, `WonderwareHistorianClient.cs:132-150` |
|
||||
| Driver.Modbus-003 | Low | Resolved | Concurrency & thread safety | `ModbusDriver.cs:59,188,241,259,266,726,745,759` |
|
||||
| Driver.Modbus-007 | Low | Resolved | Design-document adherence | `ModbusDriver.cs:1392`, `ModbusDriverOptions.cs:74-80` |
|
||||
| Driver.Modbus-008 | Low | Resolved | Documentation & comments | `ModbusDriver.cs:411-417,700-703,737-744` |
|
||||
@@ -390,3 +400,4 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Server-012 | Low | Resolved | Performance & resource management | `src/Server/ZB.MOM.WW.OtOpcUa.Server/Hosting/PeerHttpProbeLoop.cs:78-79` |
|
||||
| Server-014 | Low | Resolved | Code organization & conventions | `src/Server/ZB.MOM.WW.OtOpcUa.Server/SealedBootstrap.cs` |
|
||||
| Server-015 | Low | Resolved | Documentation & comments | `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs:16-21`, `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaServerOptions.cs:21-26` |
|
||||
| Driver.Galaxy-015 | ~~Medium~~ Low (re-triaged 2026-05-23) | Resolved | ~~Security~~ Documentation & comments (re-triaged 2026-05-23) | `libs/MxGateway.Client.dll`, `libs/MxGateway.Contracts.dll`, `libs/README.md` |
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
# Multi-stage build of OtOpcUa.Host targeting linux-x64. Used by docker-dev/docker-compose.yml
|
||||
# to spin four host containers (admin-a, admin-b, driver-a, driver-b) from a single image —
|
||||
# Compose drives OTOPCUA_ROLES + Cluster:* env per container to differentiate them.
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
WORKDIR /src
|
||||
COPY . .
|
||||
RUN dotnet restore ZB.MOM.WW.OtOpcUa.slnx
|
||||
RUN dotnet publish src/Server/ZB.MOM.WW.OtOpcUa.Host/ZB.MOM.WW.OtOpcUa.Host.csproj \
|
||||
-c Release -o /app --no-restore
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime
|
||||
WORKDIR /app
|
||||
COPY --from=build /app ./
|
||||
|
||||
EXPOSE 9000
|
||||
EXPOSE 4053
|
||||
EXPOSE 4840
|
||||
|
||||
ENTRYPOINT ["dotnet", "OtOpcUa.Host.dll"]
|
||||
@@ -0,0 +1,112 @@
|
||||
# docker-dev
|
||||
|
||||
Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up **three isolated Akka clusters** + SQL Server + OpenLDAP + Traefik on the same Compose network. All three clusters share the single `OtOpcUa` ConfigDb — multi-tenancy is enforced by per-row `ServerCluster.ClusterId` scoping. Akka.Cluster gossip stays isolated between meshes because their seed-node lists are disjoint, even though they share the same system name `otopcua`.
|
||||
|
||||
## Stack
|
||||
|
||||
### Shared infrastructure
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `sql` | SQL Server 2022 — single `OtOpcUa` ConfigDb shared by all three clusters | host `14330` → container `1433` |
|
||||
| `traefik` | Routes :80 by Host header / PathPrefix | host `80`, dashboard `8089` |
|
||||
|
||||
Authentication runs in `DevStubMode` — every host container has `Authentication__Ldap__DevStubMode=true` set, so the LDAP service is not part of the dev compose right now (the `bitnami/openldap:2.6` image was retired and the legacy tag crashes mid-setup with exit 68). Any non-empty username/password signs in as `FleetAdmin`. To restore a real LDAP service, drop the env var and add an `openldap`-compatible image back to compose.
|
||||
|
||||
### Main cluster — split admin/driver roles
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `admin-a` | `OTOPCUA_ROLES=admin`, cluster seed | internal `9000` |
|
||||
| `admin-b` | `OTOPCUA_ROLES=admin`, joins admin-a | internal `9000` |
|
||||
| `driver-a` | `OTOPCUA_ROLES=driver` | host `4840` → container `4840` |
|
||||
| `driver-b` | `OTOPCUA_ROLES=driver` | host `4841` → container `4840` |
|
||||
|
||||
### Site A cluster — 2-node fused admin+driver
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `site-a-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4842` → container `4840` |
|
||||
| `site-a-2` | `OTOPCUA_ROLES=admin,driver`, joins site-a-1 | host `4843` → container `4840` |
|
||||
|
||||
### Site B cluster — 2-node fused admin+driver
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `site-b-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4844` → container `4840` |
|
||||
| `site-b-2` | `OTOPCUA_ROLES=admin,driver`, joins site-b-1 | host `4845` → container `4840` |
|
||||
|
||||
All containers bind Akka remoting to port `4053` inside their own network namespace; the `PublicHostname` of each matches its Compose service name. Akka mesh isolation is enforced purely by disjoint seed lists. Configuration-side isolation is enforced by `ServerCluster.ClusterId` — see "Multi-tenancy" below.
|
||||
|
||||
## Multi-tenancy
|
||||
|
||||
All eight host nodes write to the same `OtOpcUa` ConfigDb. The `ServerCluster` table differentiates the three Akka meshes: each Akka cluster maps to one row, and each `ClusterNode` row's `ClusterId` ties the runtime node back to its owning cluster scope.
|
||||
|
||||
A one-shot `cluster-seed` Compose service (image `mcr.microsoft.com/mssql-tools`) waits for SQL + the EF auto-migration to complete and then INSERTs the rows below. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops:
|
||||
|
||||
| Akka mesh | `ServerCluster.ClusterId` | `ClusterNode.NodeId` rows |
|
||||
|---|---|---|
|
||||
| Main | `MAIN` | `driver-a`, `driver-b` (OPC UA publishers) |
|
||||
| Site A | `SITE-A` | `site-a-1`, `site-a-2` |
|
||||
| Site B | `SITE-B` | `site-b-1`, `site-b-2` |
|
||||
|
||||
`ClusterNode` is the table for **OPC UA-publishing nodes** (not every Akka cluster member), which is why the main cluster's `admin-a` / `admin-b` don't get rows — they're control-plane-only.
|
||||
|
||||
Each `ClusterNode.NodeId` matches the node's `Cluster__PublicHostname` env value (Compose service name) — that's the lookup the runtime uses to resolve its own membership. `ApplicationUri` follows the `urn:OtOpcUa:<NodeId>` convention.
|
||||
|
||||
The SQL lives at `seed/seed-clusters.sql`; the wait-and-apply wrapper lives at `seed/entrypoint.sh`. To re-seed manually:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml run --rm cluster-seed
|
||||
```
|
||||
|
||||
### Galaxy / MxAccess gateway
|
||||
|
||||
The seed also pre-creates a `SystemPlatform` Namespace + a `GalaxyMxGateway` DriverInstance in the MAIN cluster pointing at `http://10.100.0.48:5120`. The API key is resolved from the `GALAXY_MXGW_API_KEY` env var set on every driver-role container in compose; override via `GALAXY_MXGW_API_KEY=... docker compose up -d` to swap keys without editing the compose file.
|
||||
|
||||
The DriverHost actor doesn't spawn drivers from raw DriverInstance rows on its own — the v2 deploy lifecycle requires a *sealed Deployment* before drivers materialise. After first bring-up, sign in to the Admin UI and click **Deploy current configuration** on `/deployments` to compose the seeded rows into an artifact and dispatch it. The Galaxy driver instance will start its gRPC connection to the gateway on the next deploy ack.
|
||||
|
||||
## Bring up
|
||||
|
||||
```bash
|
||||
# from the repo root
|
||||
docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
|
||||
# wait ~20 seconds for SQL to come up + all three clusters to form
|
||||
|
||||
open http://localhost # main cluster admin UI
|
||||
open http://site-a.localhost # site A admin UI
|
||||
open http://site-b.localhost # site B admin UI
|
||||
open http://localhost:8089 # Traefik dashboard
|
||||
```
|
||||
|
||||
On macOS, `*.localhost` resolves to `127.0.0.1` automatically. On Linux add `127.0.0.1 site-a.localhost site-b.localhost` to `/etc/hosts` if your resolver doesn't.
|
||||
|
||||
The first build takes a few minutes (.NET SDK image + restore + publish). Subsequent rebuilds are faster with Docker's layer cache.
|
||||
|
||||
## Auth (dev only)
|
||||
|
||||
`Authentication__Ldap__DevStubMode=true` is set on every host container, so any non-empty username/password signs in as a `FleetAdmin` user without contacting an LDAP server. **Do not** ship this configuration to production — set `DevStubMode=false` and wire a real LDAP backend before any non-dev deployment.
|
||||
|
||||
## Tear down
|
||||
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml down -v
|
||||
```
|
||||
|
||||
The `-v` drops the SQL + LDAP volumes; remove it to keep ConfigDb state across restarts.
|
||||
|
||||
## Failover smoke
|
||||
|
||||
1. Watch the Traefik dashboard at `http://localhost:8089`. Both `admin-a` and `admin-b` should be listed as healthy in the `otopcua-admin` service.
|
||||
2. `docker compose -f docker-dev/docker-compose.yml stop admin-a` — `admin-b` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `admin-b` once its `/health/active` returns 200.
|
||||
3. `docker compose -f docker-dev/docker-compose.yml start admin-a` — `admin-a` rejoins as a follower; `admin-b` keeps the leader role until something disturbs it.
|
||||
|
||||
## Notes
|
||||
|
||||
- This compose is for the **local Mac/Linux developer rig**. The team's CI + soak runs go to the remote docker host at `10.100.0.35` (see `docs/v2/dev-environment.md`); the file there mirrors this one with adjusted port bindings.
|
||||
- The OPC UA driver endpoints are reachable directly from the host (Traefik is only in front of the admin HTTP surface):
|
||||
- Main: `opc.tcp://localhost:4840` (driver-a), `opc.tcp://localhost:4841` (driver-b)
|
||||
- Site A: `opc.tcp://localhost:4842` (site-a-1), `opc.tcp://localhost:4843` (site-a-2)
|
||||
- Site B: `opc.tcp://localhost:4844` (site-b-1), `opc.tcp://localhost:4845` (site-b-2)
|
||||
- Galaxy + Wonderware drivers can't run in Linux containers (they need the Windows-only mxaccessgw + Historian SDK). On non-Windows, `DriverInstanceActor.ShouldStub(driverType, roles)` returns `true` for those types and the actor goes straight to a `Stubbed` state that returns deterministic success.
|
||||
@@ -0,0 +1,261 @@
|
||||
# docker-dev/ — Mac-friendly multi-cluster fleet for v2 development + manual UI exercise.
|
||||
#
|
||||
# Stack (3 separate Akka clusters — all share the single `OtOpcUa` ConfigDb):
|
||||
# sql SQL Server 2022 — hosts the one ConfigDb that all three clusters use
|
||||
# ldap OpenLDAP with the dev users from C:\publish\glauth\auth.md mirrored in
|
||||
#
|
||||
# Main cluster (existing — split-role admin / driver pair on a single Akka mesh):
|
||||
# admin-a OtOpcUa.Host with OTOPCUA_ROLES=admin (seed)
|
||||
# admin-b OtOpcUa.Host with OTOPCUA_ROLES=admin (joins admin-a)
|
||||
# driver-a OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
# driver-b OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
#
|
||||
# Site A cluster (2-node fused admin+driver):
|
||||
# site-a-1, site-a-2 OTOPCUA_ROLES=admin,driver, seed = site-a-1
|
||||
#
|
||||
# Site B cluster (2-node fused admin+driver):
|
||||
# site-b-1, site-b-2 OTOPCUA_ROLES=admin,driver, seed = site-b-1
|
||||
#
|
||||
# traefik PathPrefix → main cluster admin-a/admin-b; Host(`site-a.localhost`) →
|
||||
# site-a-*; Host(`site-b.localhost`) → site-b-*. Add the two site hosts to
|
||||
# your /etc/hosts (or rely on macOS `.localhost` auto-resolution).
|
||||
#
|
||||
# Multi-tenancy: ConfigDb is one schema with a `ServerCluster` table; each Akka cluster
|
||||
# corresponds to a row in it (ClusterId = "MAIN" / "SITE-A" / "SITE-B"), and each node's
|
||||
# `ClusterNode.NodeId` points back at the row that owns it. After first boot, sign in to
|
||||
# any cluster's Admin UI and create the matching ServerCluster + ClusterNode rows via
|
||||
# /clusters and /hosts so the runtime knows what configuration scope applies.
|
||||
#
|
||||
# Akka mesh isolation: same system name "otopcua" + same remoting port 4053 inside each
|
||||
# container's own network namespace, but with disjoint seed-node lists — gossip never
|
||||
# crosses between the three meshes.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
# open http://localhost # main cluster Blazor admin UI
|
||||
# open http://site-a.localhost # site A admin UI
|
||||
# open http://site-b.localhost # site B admin UI
|
||||
# open http://localhost:8089 # Traefik dashboard (8080 is the sister scadalink stack)
|
||||
#
|
||||
# Tear-down: docker compose -f docker-dev/docker-compose.yml down -v
|
||||
|
||||
name: otopcua-dev
|
||||
|
||||
services:
|
||||
|
||||
sql:
|
||||
image: mcr.microsoft.com/mssql/server:2022-latest
|
||||
environment:
|
||||
ACCEPT_EULA: "Y"
|
||||
SA_PASSWORD: "OtOpcUa!Dev123"
|
||||
MSSQL_PID: Developer
|
||||
ports:
|
||||
- "14330:1433"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P 'OtOpcUa!Dev123' -No -Q 'SELECT 1' || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
|
||||
# ── Cluster seed (one-shot) ────────────────────────────────────────────────
|
||||
# Waits for SQL + the host containers' EF auto-migration, then INSERTs the
|
||||
# three ServerCluster rows and the six ClusterNode rows that scope each Akka
|
||||
# mesh inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops.
|
||||
cluster-seed:
|
||||
image: mcr.microsoft.com/mssql-tools:latest
|
||||
depends_on:
|
||||
sql:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./seed:/seed:ro
|
||||
entrypoint: ["/bin/bash", "/seed/entrypoint.sh"]
|
||||
restart: "no"
|
||||
|
||||
# OpenLDAP was previously here but the bitnami/openldap:2.6 image was retired
|
||||
# (manifest gone) and bitnamilegacy/openldap:2.6 crashes during LDIF setup with
|
||||
# exit 68. For the dev compose every host container now runs with
|
||||
# Authentication__Ldap__DevStubMode=true, so any non-empty username/password
|
||||
# signs in as `FleetAdmin`. Restore a real LDAP service when there's a need
|
||||
# for end-to-end LDAP coverage (the host code path is unchanged).
|
||||
|
||||
admin-a: &otopcua-host
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker-dev/Dockerfile
|
||||
image: otopcua-host:dev
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
|
||||
admin-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
|
||||
driver-a:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
# Resolved at runtime by GalaxyDriver.ResolveApiKey when a DriverInstance's
|
||||
# Gateway.ApiKeySecretRef = "env:GALAXY_MXGW_API_KEY".
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4840:4840"
|
||||
|
||||
driver-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4841:4840"
|
||||
|
||||
# ── Site A cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
# Shares the OtOpcUa ConfigDb with the main + site-b clusters; multi-tenancy is
|
||||
# enforced by ServerCluster.ClusterId rows (configure via /clusters after boot).
|
||||
# Akka isolation comes from the disjoint seed list (seed = site-a-1).
|
||||
|
||||
site-a-1:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4842:4840"
|
||||
|
||||
site-a-2:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-a-1: { condition: service_started }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4843:4840"
|
||||
|
||||
# ── Site B cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
|
||||
site-b-1:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4844:4840"
|
||||
|
||||
site-b-2:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-b-1: { condition: service_started }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__DevStubMode: "true"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4845:4840"
|
||||
|
||||
traefik:
|
||||
image: traefik:v3.1
|
||||
command:
|
||||
- --entrypoints.web.address=:80
|
||||
- --providers.file.filename=/etc/traefik/dynamic.yml
|
||||
- --providers.file.watch=true
|
||||
- --api.insecure=true
|
||||
ports:
|
||||
- "9200:80" # host port 9200 → traefik :80 entrypoint (80 conflicts with scadabridge-traefik)
|
||||
- "8089:8080" # 8080 conflicts with the sister scadalink dev stack
|
||||
volumes:
|
||||
- ./traefik-dynamic.yml:/etc/traefik/dynamic.yml:ro
|
||||
depends_on:
|
||||
- admin-a
|
||||
- admin-b
|
||||
- site-a-1
|
||||
- site-a-2
|
||||
- site-b-1
|
||||
- site-b-2
|
||||
Executable
+48
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
# docker-dev cluster-seed entrypoint. Waits for the OtOpcUa ConfigDb schema to
|
||||
# be in place, then applies the idempotent row seed.
|
||||
#
|
||||
# IMPORTANT: this container does NOT run EF migrations — sqlcmd can't execute
|
||||
# the V2 migration script cleanly because it contains CREATE PROCEDURE
|
||||
# statements inside IF NOT EXISTS BEGIN ... END blocks (procs must be the
|
||||
# first statement in their batch). Migrations are owned by the operator:
|
||||
#
|
||||
# dotnet ef database update \
|
||||
# --project src/Core/ZB.MOM.WW.OtOpcUa.Configuration \
|
||||
# --startup-project src/Server/ZB.MOM.WW.OtOpcUa.Host
|
||||
#
|
||||
# (with ConnectionStrings__ConfigDb pointing at Server=localhost,14330;...).
|
||||
# Once the schema is in place, restart the cluster-seed container — or just
|
||||
# `docker compose up -d` and the seed will pick up where it left off thanks to
|
||||
# the IF NOT EXISTS guards in seed-clusters.sql.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SQLCMD="/opt/mssql-tools/bin/sqlcmd"
|
||||
SERVER="${SQL_HOST:-sql},1433"
|
||||
USER="${SQL_USER:-sa}"
|
||||
PASS="${SQL_PASSWORD:-OtOpcUa!Dev123}"
|
||||
DB="${SQL_DATABASE:-OtOpcUa}"
|
||||
|
||||
run_sql_in() {
|
||||
local target_db="$1"; shift
|
||||
# -I forces SET QUOTED_IDENTIFIER ON (needed for filtered indexes if you
|
||||
# ever extend this script to touch them).
|
||||
"$SQLCMD" -S "$SERVER" -U "$USER" -P "$PASS" -d "$target_db" -b -h -1 -I "$@"
|
||||
}
|
||||
|
||||
echo "[cluster-seed] waiting for SQL Server to accept connections..."
|
||||
until run_sql_in master -Q "SELECT 1" >/dev/null 2>&1; do
|
||||
sleep 2
|
||||
done
|
||||
echo "[cluster-seed] SQL Server up."
|
||||
|
||||
echo "[cluster-seed] waiting for ${DB} database + dbo.ServerCluster table (operator must run dotnet ef database update)..."
|
||||
until run_sql_in "$DB" -Q "IF OBJECT_ID('dbo.ServerCluster') IS NULL THROW 50001, 'missing', 1; SELECT 1" >/dev/null 2>&1; do
|
||||
sleep 3
|
||||
done
|
||||
echo "[cluster-seed] schema ready."
|
||||
|
||||
echo "[cluster-seed] applying seed-clusters.sql (ServerCluster + ClusterNode rows)..."
|
||||
run_sql_in "$DB" -i /seed/seed-clusters.sql
|
||||
echo "[cluster-seed] done."
|
||||
@@ -0,0 +1,195 @@
|
||||
-- docker-dev cluster seed. Idempotent — safe to re-run on every `docker compose up`.
|
||||
--
|
||||
-- Populates:
|
||||
-- ServerCluster MAIN, SITE-A, SITE-B
|
||||
-- ClusterNode driver-a, driver-b → MAIN
|
||||
-- site-a-1, site-a-2 → SITE-A
|
||||
-- site-b-1, site-b-2 → SITE-B
|
||||
--
|
||||
-- ServerCluster.NodeCount + RedundancyMode are coupled by CHECK constraint:
|
||||
-- NodeCount=1 ⇒ RedundancyMode='None'
|
||||
-- NodeCount=2 ⇒ RedundancyMode∈('Warm','Hot')
|
||||
--
|
||||
-- Each ClusterNode.ApplicationUri MUST be globally unique (UX_ClusterNode_ApplicationUri).
|
||||
-- Convention: urn:OtOpcUa:<NodeId>.
|
||||
--
|
||||
-- Host = Compose service name (resolves inside the otopcua-dev network).
|
||||
-- OpcUaPort stays at the container-internal 4840; the host-side port mapping is in
|
||||
-- docker-compose.yml ports: blocks and is irrelevant to ClusterNode rows.
|
||||
|
||||
SET NOCOUNT ON;
|
||||
SET XACT_ABORT ON;
|
||||
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ServerCluster
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'MAIN')
|
||||
INSERT INTO dbo.ServerCluster
|
||||
(ClusterId, Name, Enterprise, Site, NodeCount, RedundancyMode, Enabled, Notes, CreatedBy)
|
||||
VALUES
|
||||
('MAIN', 'Main cluster', 'zb', 'docker-dev',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — admin-a/admin-b control-plane, driver-a/driver-b OPC UA publishers.',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-A')
|
||||
INSERT INTO dbo.ServerCluster
|
||||
(ClusterId, Name, Enterprise, Site, NodeCount, RedundancyMode, Enabled, Notes, CreatedBy)
|
||||
VALUES
|
||||
('SITE-A', 'Site A', 'zb', 'site-a',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
INSERT INTO dbo.ServerCluster
|
||||
(ClusterId, Name, Enterprise, Site, NodeCount, RedundancyMode, Enabled, Notes, CreatedBy)
|
||||
VALUES
|
||||
('SITE-B', 'Site B', 'zb', 'site-b',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — main cluster OPC UA publishers
|
||||
--
|
||||
-- NodeId is "<compose-service>:4053" so it matches what ClusterRoleInfo +
|
||||
-- ConfigPublishCoordinator derive from Akka.Cluster.Get(system).State.Members
|
||||
-- (member.Address.Host:Port). NodeDeploymentState.NodeId is FK-bound to
|
||||
-- ClusterNode.NodeId; mismatched values cause FK 547 on deploy.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-a:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-a:4053', 'MAIN', 'driver-a', 4840, 8081, 'urn:OtOpcUa:driver-a', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-b:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-b:4053', 'MAIN', 'driver-b', 4840, 8081, 'urn:OtOpcUa:driver-b', 150, 1, 'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — site A
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-1:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-a-1:4053', 'SITE-A', 'site-a-1', 4840, 8081, 'urn:OtOpcUa:site-a-1', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-2:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-a-2:4053', 'SITE-A', 'site-a-2', 4840, 8081, 'urn:OtOpcUa:site-a-2', 150, 1, 'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — site B
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-1:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-b-1:4053', 'SITE-B', 'site-b-1', 4840, 8081, 'urn:OtOpcUa:site-b-1', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-2:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-b-2:4053', 'SITE-B', 'site-b-2', 4840, 8081, 'urn:OtOpcUa:site-b-2', 150, 1, 'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Galaxy MxAccess gateway — MAIN cluster
|
||||
--
|
||||
-- Namespace.Kind=SystemPlatform is required for Galaxy/MXAccess data per
|
||||
-- decision #107; raw equipment drivers use Equipment. DriverInstance points
|
||||
-- at the external mxaccessgw process. The driver code lives in this repo
|
||||
-- (.NET 10, cross-platform); only the gateway worker needs Windows.
|
||||
--
|
||||
-- ApiKeySecretRef = env:GALAXY_MXGW_API_KEY → resolved at runtime by
|
||||
-- GalaxyDriver.ResolveApiKey. The env var is set on every driver-role
|
||||
-- container in docker-compose.yml.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.Namespace WHERE NamespaceId = 'MAIN-galaxy')
|
||||
INSERT INTO dbo.Namespace
|
||||
(NamespaceRowId, NamespaceId, ClusterId, Kind, NamespaceUri, Enabled, Notes)
|
||||
VALUES
|
||||
(NEWID(), 'MAIN-galaxy', 'MAIN', 'SystemPlatform',
|
||||
'urn:zb:docker-dev:galaxy', 1,
|
||||
'docker-dev seed — Galaxy / MXAccess namespace served by the MAIN cluster.');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.DriverInstance WHERE DriverInstanceId = 'MAIN-galaxy-mxgw')
|
||||
INSERT INTO dbo.DriverInstance
|
||||
(DriverInstanceRowId, DriverInstanceId, ClusterId, NamespaceId, Name, DriverType, Enabled, DriverConfig)
|
||||
VALUES
|
||||
(NEWID(), 'MAIN-galaxy-mxgw', 'MAIN', 'MAIN-galaxy',
|
||||
'MxAccess gateway (10.100.0.48:5120)', 'GalaxyMxGateway', 1,
|
||||
N'{
|
||||
"Gateway": {
|
||||
"Endpoint": "http://10.100.0.48:5120",
|
||||
"ApiKeySecretRef": "env:GALAXY_MXGW_API_KEY",
|
||||
"UseTls": false,
|
||||
"ConnectTimeoutSeconds": 10,
|
||||
"DefaultCallTimeoutSeconds": 30
|
||||
},
|
||||
"MxAccess": {
|
||||
"ClientName": "OtOpcUa-MAIN-docker-dev",
|
||||
"PublishingIntervalMs": 1000
|
||||
},
|
||||
"Repository": {
|
||||
"DiscoverPageSize": 5000,
|
||||
"WatchDeployEvents": true
|
||||
},
|
||||
"Reconnect": {
|
||||
"InitialBackoffMs": 500,
|
||||
"MaxBackoffMs": 30000,
|
||||
"ReplayOnSessionLost": true
|
||||
}
|
||||
}');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Galaxy test tags — TestMachine_001.TestAlarm001..003
|
||||
--
|
||||
-- SystemPlatform-namespace tags have EquipmentId=NULL and use FolderPath +
|
||||
-- Name to address the MXAccess item. The Galaxy driver subscribes via the
|
||||
-- "FolderPath.Name" MXAccess reference form; OPC UA browse path is the
|
||||
-- equivalent "FolderPath/Name" under the SystemPlatform namespace.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.Tag WHERE TagId = 'MAIN-galaxy-TestMachine_001-TestAlarm001')
|
||||
INSERT INTO dbo.Tag
|
||||
(TagRowId, TagId, DriverInstanceId, DeviceId, EquipmentId, Name, FolderPath, DataType, AccessLevel, WriteIdempotent, PollGroupId, TagConfig)
|
||||
VALUES
|
||||
(NEWID(), 'MAIN-galaxy-TestMachine_001-TestAlarm001', 'MAIN-galaxy-mxgw', NULL, NULL,
|
||||
'TestAlarm001', 'TestMachine_001', 'Boolean', 0, 0, NULL, N'{}');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.Tag WHERE TagId = 'MAIN-galaxy-TestMachine_001-TestAlarm002')
|
||||
INSERT INTO dbo.Tag
|
||||
(TagRowId, TagId, DriverInstanceId, DeviceId, EquipmentId, Name, FolderPath, DataType, AccessLevel, WriteIdempotent, PollGroupId, TagConfig)
|
||||
VALUES
|
||||
(NEWID(), 'MAIN-galaxy-TestMachine_001-TestAlarm002', 'MAIN-galaxy-mxgw', NULL, NULL,
|
||||
'TestAlarm002', 'TestMachine_001', 'Boolean', 0, 0, NULL, N'{}');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.Tag WHERE TagId = 'MAIN-galaxy-TestMachine_001-TestAlarm003')
|
||||
INSERT INTO dbo.Tag
|
||||
(TagRowId, TagId, DriverInstanceId, DeviceId, EquipmentId, Name, FolderPath, DataType, AccessLevel, WriteIdempotent, PollGroupId, TagConfig)
|
||||
VALUES
|
||||
(NEWID(), 'MAIN-galaxy-TestMachine_001-TestAlarm003', 'MAIN-galaxy-mxgw', NULL, NULL,
|
||||
'TestAlarm003', 'TestMachine_001', 'Boolean', 0, 0, NULL, N'{}');
|
||||
|
||||
COMMIT TRANSACTION;
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Summary (logged by sqlcmd output)
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
SELECT ClusterId, Name, NodeCount, RedundancyMode FROM dbo.ServerCluster ORDER BY ClusterId;
|
||||
SELECT NodeId, ClusterId, Host, OpcUaPort, ApplicationUri, ServiceLevelBase
|
||||
FROM dbo.ClusterNode ORDER BY ClusterId, NodeId;
|
||||
SELECT NamespaceId, ClusterId, Kind, NamespaceUri FROM dbo.Namespace ORDER BY ClusterId, NamespaceId;
|
||||
SELECT DriverInstanceId, ClusterId, DriverType, NamespaceId, Name
|
||||
FROM dbo.DriverInstance ORDER BY ClusterId, DriverInstanceId;
|
||||
SELECT TagId, DriverInstanceId, FolderPath, Name, DataType FROM dbo.Tag ORDER BY DriverInstanceId, FolderPath, Name;
|
||||
@@ -0,0 +1,81 @@
|
||||
# docker-dev companion to scripts/install/traefik-dynamic.yml. Routes three
|
||||
# Akka clusters that share the Compose network:
|
||||
#
|
||||
# - Main cluster (default): PathPrefix(`/`) → admin-a / admin-b.
|
||||
# - Site A cluster: Host(`site-a.localhost`) → site-a-1 / site-a-2.
|
||||
# - Site B cluster: Host(`site-b.localhost`) → site-b-1 / site-b-2.
|
||||
#
|
||||
# Host-header rules are more specific than PathPrefix, so they win over the
|
||||
# default router for the site hostnames automatically — no priority field needed.
|
||||
|
||||
http:
|
||||
routers:
|
||||
otopcua-admin:
|
||||
entryPoints: ["web"]
|
||||
rule: "PathPrefix(`/`)"
|
||||
service: otopcua-admin
|
||||
|
||||
otopcua-site-a:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-a.localhost`)"
|
||||
service: otopcua-site-a
|
||||
|
||||
otopcua-site-b:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-b.localhost`)"
|
||||
service: otopcua-site-b
|
||||
|
||||
services:
|
||||
otopcua-admin:
|
||||
loadBalancer:
|
||||
# Blazor Server uses SignalR; the WebSocket upgrade must hit the same
|
||||
# backend that owns the circuit ID. Sticky cookie keeps each session
|
||||
# pinned to one node so the post-handshake WebSocket doesn't 404.
|
||||
sticky:
|
||||
cookie:
|
||||
name: otopcua_lb
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://admin-a:9000"
|
||||
- url: "http://admin-b:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-a:
|
||||
loadBalancer:
|
||||
# Blazor Server uses SignalR; the WebSocket upgrade must hit the same
|
||||
# backend that owns the circuit ID. Sticky cookie keeps each session
|
||||
# pinned to one node so the post-handshake WebSocket doesn't 404.
|
||||
sticky:
|
||||
cookie:
|
||||
name: otopcua_lb
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://site-a-1:9000"
|
||||
- url: "http://site-a-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-b:
|
||||
loadBalancer:
|
||||
# Blazor Server uses SignalR; the WebSocket upgrade must hit the same
|
||||
# backend that owns the circuit ID. Sticky cookie keeps each session
|
||||
# pinned to one node so the post-handshake WebSocket doesn't 404.
|
||||
sticky:
|
||||
cookie:
|
||||
name: otopcua_lb
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://site-b-1:9000"
|
||||
- url: "http://site-b-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
@@ -1,6 +1,6 @@
|
||||
# Address Space
|
||||
|
||||
Each driver's browsable subtree is built by streaming nodes from the driver's `ITagDiscovery.DiscoverAsync` implementation into an `IAddressSpaceBuilder`. `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) owns the shared orchestration; `DriverNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs`) implements `IAddressSpaceBuilder` against the OPC Foundation stack's `CustomNodeManager2`. The same code path serves Galaxy object hierarchies, Modbus PLC registers, AB CIP tags, TwinCAT symbols, FOCAS CNC parameters, and OPC UA Client aggregations — Galaxy is one driver of seven, not the driver.
|
||||
Each driver's browsable subtree is built by streaming nodes from the driver's `ITagDiscovery.DiscoverAsync` implementation into an `IAddressSpaceBuilder`. `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) owns the shared orchestration; in v2 the SDK-driven materialization is handled by `OtOpcUaNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`) fed via `SdkAddressSpaceSink` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs`). The same code path serves Galaxy object hierarchies, Modbus PLC registers, AB CIP tags, TwinCAT symbols, FOCAS CNC parameters, and OPC UA Client aggregations — Galaxy is one driver of seven, not the driver.
|
||||
|
||||
## Driver root folder
|
||||
|
||||
@@ -66,7 +66,7 @@ Drivers that implement `IRediscoverable` fire `OnRediscoveryNeeded` when their b
|
||||
## Key source files
|
||||
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — orchestration + `CapturingBuilder`
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — OPC UA materialization (`IAddressSpaceBuilder` impl + `NestedBuilder`)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`, `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs` — OPC UA materialization (write-only sink fed by the actor system)
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IAddressSpaceBuilder.cs` — builder contract
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs` — driver discovery capability
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverAttributeInfo.cs` — per-attribute descriptor
|
||||
|
||||
@@ -15,9 +15,10 @@ historical reference.
|
||||
| **Galaxy sub-attribute fallback** | `IWritable` writes to `$Alarm*` sub-attributes | gateway data subscription → driver `OnDataChange` → `DriverNodeManager` ConditionSink → `AlarmConditionService` |
|
||||
| **Scripted alarms** | `Phase7EngineComposer` | server-side script evaluator → `Phase7EngineComposer.RouteToHistorianAsync` + `AlarmConditionService` |
|
||||
|
||||
All three converge on `AlarmConditionService` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Alarms/AlarmConditionService.cs`),
|
||||
which owns the OPC UA Part 9 state machine and dispatches transitions
|
||||
to the OPC UA condition node managers. Driver-native transitions take
|
||||
All three converge on the alarm-state actor — in v2 the OPC UA Part 9 state
|
||||
machine lives inside `ScriptedAlarmActor`
|
||||
(`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`),
|
||||
which dispatches transitions to the OPC UA condition node managers. Driver-native transitions take
|
||||
precedence over sub-attribute synthesis when both arrive for the same
|
||||
condition — the dedup logic prefers the richer driver-native record
|
||||
because it carries the full operator + raise-time + category metadata
|
||||
|
||||
+40
-16
@@ -149,53 +149,77 @@ otopcua-cli browse -u opc.tcp://localhost:4840/OtOpcUa -U admin -P admin123 -r -
|
||||
|
||||
### subscribe
|
||||
|
||||
Monitors a node for value changes using OPC UA subscriptions. Prints each data change notification with timestamp, value, and status code. Runs until Ctrl+C, then unsubscribes and disconnects cleanly.
|
||||
Monitors a node (or every Variable in its subtree) for value changes using OPC UA subscriptions.
|
||||
Prints each data-change notification with timestamp, value, and status code, then prints a
|
||||
summary on exit. Exits on Ctrl+C, or automatically after `--duration` seconds.
|
||||
|
||||
```bash
|
||||
# Subscribe to a single node
|
||||
otopcua-cli subscribe -u opc.tcp://localhost:4840 -n "ns=2;s=MyNode" -i 500
|
||||
|
||||
# Browse a subtree and subscribe to every Variable, run for 60 seconds, write the summary to disk
|
||||
otopcua-cli subscribe -u opc.tcp://localhost:4840 -n "ns=3;s=ZB" -r --max-depth 4 \
|
||||
--duration 60 --quiet --summary-file C:\Temp\subscribe-summary.txt
|
||||
```
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `-n` / `--node` | Node ID to monitor (required) |
|
||||
| `-i` / `--interval` | Sampling/publishing interval in milliseconds (default: 1000) |
|
||||
| `-n` / `--node` | Node ID to monitor (required). When `--recursive` is set, this is the browse root. |
|
||||
| `-i` / `--interval` | Sampling interval in milliseconds (default: 1000) |
|
||||
| `-r` / `--recursive` | Browse recursively from `--node` and subscribe to every Variable found |
|
||||
| `--max-depth` | Maximum recursion depth when `--recursive` is set (default: 10) |
|
||||
| `-q` / `--quiet` | Suppress per-update output; only print the final summary |
|
||||
| `--duration` | Auto-exit after N seconds and print the summary (0 = run until Ctrl+C, default: 0) |
|
||||
| `--summary-file` | Also write the summary to this file path on exit |
|
||||
|
||||
#### Summary buckets
|
||||
|
||||
The summary prints per-node counts across these buckets:
|
||||
|
||||
- **Ever went BAD during window** — node received at least one notification whose status was not Good.
|
||||
- **NEVER went bad (suspect)** — node received at least one notification and every one was Good.
|
||||
- **Last status GOOD / NOT-GOOD** — final observed status across nodes that received any update.
|
||||
- **No update received at all** — node was subscribed but no notification arrived during the window.
|
||||
|
||||
### historyread
|
||||
|
||||
Reads historical data from a node. Supports raw history reads and aggregate (processed) history reads.
|
||||
`--start` and `--end` are parsed with `CultureInfo.InvariantCulture` and treated as UTC; supply
|
||||
them in ISO 8601 UTC form (`YYYY-MM-DDTHH:MM:SSZ`) for unambiguous behaviour across hosts.
|
||||
|
||||
```bash
|
||||
# Raw history
|
||||
otopcua-cli historyread -u opc.tcp://localhost:4840/OtOpcUa \
|
||||
-n "ns=1;s=TestMachine_001.TestHistoryValue" \
|
||||
--start "2026-03-25" --end "2026-03-30"
|
||||
--start "2026-03-25T00:00:00Z" --end "2026-03-30T00:00:00Z"
|
||||
|
||||
# Aggregate: 1-hour average
|
||||
otopcua-cli historyread -u opc.tcp://localhost:4840/OtOpcUa \
|
||||
-n "ns=1;s=TestMachine_001.TestHistoryValue" \
|
||||
--start "2026-03-25" --end "2026-03-30" \
|
||||
--start "2026-03-25T00:00:00Z" --end "2026-03-30T00:00:00Z" \
|
||||
--aggregate Average --interval 3600000
|
||||
```
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `-n` / `--node` | Node ID to read history for (required) |
|
||||
| `--start` | Start time, ISO 8601 or date string (default: 24 hours ago) |
|
||||
| `--end` | End time, ISO 8601 or date string (default: now) |
|
||||
| `--start` | Start time in ISO 8601 UTC format, e.g. `2026-01-15T08:00:00Z` (default: 24 hours ago) |
|
||||
| `--end` | End time in ISO 8601 UTC format, e.g. `2026-01-15T09:00:00Z` (default: now) |
|
||||
| `--max` | Maximum number of values (default: 1000) |
|
||||
| `--aggregate` | Aggregate function: Average, Minimum, Maximum, Count, Start, End |
|
||||
| `--aggregate` | Aggregate function: Average, Minimum, Maximum, Count, Start, End, StandardDeviation |
|
||||
| `--interval` | Processing interval in milliseconds for aggregates (default: 3600000) |
|
||||
|
||||
#### Aggregate mapping
|
||||
|
||||
| Name | OPC UA Node ID |
|
||||
|------|---------------|
|
||||
| `Average` | `AggregateFunction_Average` |
|
||||
| `Minimum` | `AggregateFunction_Minimum` |
|
||||
| `Maximum` | `AggregateFunction_Maximum` |
|
||||
| `Count` | `AggregateFunction_Count` |
|
||||
| `Start` | `AggregateFunction_Start` |
|
||||
| `End` | `AggregateFunction_End` |
|
||||
| Name | Aliases | OPC UA Node ID |
|
||||
|------|---------|---------------|
|
||||
| `Average` | `avg` | `AggregateFunction_Average` |
|
||||
| `Minimum` | `min` | `AggregateFunction_Minimum` |
|
||||
| `Maximum` | `max` | `AggregateFunction_Maximum` |
|
||||
| `Count` | | `AggregateFunction_Count` |
|
||||
| `Start` | `first` | `AggregateFunction_Start` |
|
||||
| `End` | `last` | `AggregateFunction_End` |
|
||||
| `StandardDeviation` | `stddev`, `stdev` | `AggregateFunction_StandardDeviationSample` |
|
||||
|
||||
### alarms
|
||||
|
||||
|
||||
+1
-1
@@ -198,7 +198,7 @@ All times are in UTC. Invalid input turns red on blur.
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| Aggregate | Raw (default), Average, Minimum, Maximum, Count, Start, End |
|
||||
| Aggregate | Raw (default), Average, Minimum, Maximum, Count, Start, End, Standard Deviation |
|
||||
| Interval (ms) | Processing interval for aggregate queries (shown only for aggregates) |
|
||||
| Max Values | Maximum number of raw values to return (default 1000) |
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ Static drivers (Modbus, S7, AB CIP, AB Legacy, FOCAS) do not implement `IRedisco
|
||||
|
||||
Tag-set changes authored in the Admin UI (UNS edits, CSV imports, driver-config edits) accumulate in a draft generation and commit via `sp_PublishGeneration`. The delta between the currently-published generation and the proposed next one is computed by `sp_ComputeGenerationDiff`, which drives:
|
||||
|
||||
- The **DiffViewer** in Admin (`src/Server/ZB.MOM.WW.OtOpcUa.Admin/Components/Pages/Clusters/DiffViewer.razor`) so operators can preview what will change before clicking Publish.
|
||||
- The publish-preview surface in the Admin UI (`src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Deployments.razor`, backed by `AdminOperationsClient`) so operators can preview what will change before clicking Publish.
|
||||
- The 409-on-stale-draft flow (decision #161) — a UNS drag-reorder preview carries a `DraftRevisionToken` so Confirm returns `409 Conflict / refresh-required` if the draft advanced between preview and commit.
|
||||
|
||||
After publish, the server's generation applier invokes `IDriver.ReinitializeAsync(driverConfigJson, ct)` on every driver whose `DriverInstance.DriverConfig` row changed in the new generation. Reinitialize is the in-process recovery path for Tier A/B drivers; if it fails the driver is marked `DriverState.Faulted` and its nodes go Bad quality — but the server process stays running. See `docs/v2/driver-stability.md`.
|
||||
@@ -64,6 +64,7 @@ Subscriptions for unchanged references stay live across rebuilds — their ref-c
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IRediscoverable.cs` — backend-change capability
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — discovery orchestration
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriver.cs` — `ReinitializeAsync` contract
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/GenerationService.cs` — publish-flow driver
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Coordinators/ConfigPublishCoordinator.cs` — publish-flow driver
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs` — cluster singleton invoked by the Admin UI's `AdminOperationsClient`
|
||||
- `docs/v2/config-db-schema.md` — `sp_PublishGeneration` + `sp_ComputeGenerationDiff`
|
||||
- `docs/v2/admin-ui.md` — DiffViewer + draft-revision-token flow
|
||||
|
||||
+9
-8
@@ -1,13 +1,13 @@
|
||||
# OPC UA Server
|
||||
|
||||
The OPC UA server component (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs`) hosts the OPC UA stack and exposes one browsable subtree per registered driver. The server itself is driver-agnostic — Galaxy/MXAccess, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client are all plugged in as `IDriver` implementations via the capability interfaces in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
|
||||
The OPC UA server component (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs`) hosts the OPC UA stack and exposes one browsable subtree per registered driver. The server itself is driver-agnostic — Galaxy/MXAccess, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client are all plugged in as `IDriver` implementations via the capability interfaces in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
|
||||
|
||||
## Composition
|
||||
|
||||
`OtOpcUaServer` subclasses the OPC Foundation `StandardServer` and wires:
|
||||
|
||||
- A `DriverHost` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs`) which registers drivers and holds the per-instance `IDriver` references.
|
||||
- One `DriverNodeManager` per registered driver (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs`), constructed in `CreateMasterNodeManager`. Each manager owns its own namespace URI (`urn:OtOpcUa:{DriverInstanceId}`) and exposes the driver as a subtree under the standard `Objects` folder.
|
||||
- One `DriverNodeManager` per registered driver (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`), constructed in `CreateMasterNodeManager`. Each manager owns its own namespace URI (`urn:OtOpcUa:{DriverInstanceId}`) and exposes the driver as a subtree under the standard `Objects` folder.
|
||||
- A `CapabilityInvoker` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs`) per driver instance, keyed on `(DriverInstanceId, HostName, DriverCapability)` against the shared `DriverResiliencePipelineBuilder`. Every Read/Write/Discovery/Subscribe/HistoryRead/AlarmSubscribe call on the driver flows through this invoker so the Polly pipeline (retry / timeout / breaker / bulkhead) applies. The OTOPCUA0001 Roslyn analyzer enforces the wrapping at compile time.
|
||||
- An `IUserAuthenticator` (LDAP in production, injected stub in tests) for `UserName` token validation in the `ImpersonateUser` hook.
|
||||
- Optional `AuthorizationGate` + `NodeScopeResolver` (Phase 6.2) that sit in front of every dispatch call. In lax mode the gate passes through when the identity lacks LDAP groups so existing integration tests keep working; strict mode (`Authorization:StrictMode = true`) denies those cases.
|
||||
@@ -50,7 +50,7 @@ The host name fed to the invoker comes from `IPerCallHostResolver.ResolveHost(fu
|
||||
|
||||
## Redundancy
|
||||
|
||||
`Redundancy.Enabled = true` on the `ServerInstance` activates the `RedundancyCoordinator` + `ServiceLevelCalculator` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Redundancy/`). Standard OPC UA redundancy nodes (`Server/ServerRedundancy/RedundancySupport`, `ServerUriArray`, `Server/ServiceLevel`) are populated on startup; `ServiceLevel` recomputes whenever any driver's `DriverHealth` changes. The apply-lease mechanism prevents two instances from concurrently applying a generation. See `docs/Redundancy.md`.
|
||||
`Redundancy.Enabled = true` on the `ServerInstance` activates the `RedundancyStateActor` + `ServiceLevelCalculator` (`src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Redundancy/`). Standard OPC UA redundancy nodes (`Server/ServerRedundancy/RedundancySupport`, `ServerUriArray`, `Server/ServiceLevel`) are populated on startup; `ServiceLevel` recomputes whenever any driver's `DriverHealth` changes. The apply-lease mechanism prevents two instances from concurrently applying a generation. See `docs/Redundancy.md`.
|
||||
|
||||
## Server class hierarchy
|
||||
|
||||
@@ -79,10 +79,11 @@ Certificate stores default to `%LOCALAPPDATA%\OPC Foundation\pki\` (directory-ba
|
||||
|
||||
## Key source files
|
||||
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs` — `StandardServer` subclass + `ImpersonateUser` hook
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — per-driver `CustomNodeManager2` + dispatch surface
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaApplicationHost.cs` — programmatic `ApplicationConfiguration` + lifecycle
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs` — `StandardServer` subclass
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs` — programmatic `ApplicationConfiguration` + lifecycle + `ImpersonateUser` hook
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` — SDK node manager + write-only address-space sink
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs` — `IOpcUaAddressSpaceSink` adapter the actor system pushes into
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — per-driver discovery + dispatch surface
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs` — driver registration
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs` — Polly pipeline entry point
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — Phase 6.2 permission trie + evaluator
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/AuthorizationGate.cs` — stack-to-evaluator bridge
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — permission trie + evaluator (`PermissionTrie`, `PermissionTrieCache`, `TriePermissionEvaluator`)
|
||||
|
||||
+7
-4
@@ -9,10 +9,13 @@ The project was originally called **LmxOpcUa** (a single-driver Galaxy/MXAccess
|
||||
|
||||
## Platform overview
|
||||
|
||||
- **Core** owns the OPC UA stack, address space, session/security/subscription machinery.
|
||||
> **v2 (2026-05-26):** the separate `OtOpcUa.Server` + `OtOpcUa.Admin` services fused into a single role-gated `OtOpcUa.Host` binary, joined by an Akka.NET cluster. See [v2 design](plans/2026-05-26-akka-hosting-alignment-design.md) for the architectural decision.
|
||||
|
||||
- **Core** owns shared abstractions (driver capability contracts, scripting, virtual tags, alarm historian).
|
||||
- **Drivers** plug in via capability interfaces in `ZB.MOM.WW.OtOpcUa.Core.Abstractions`: `IDriver`, `IReadable`, `IWritable`, `ITagDiscovery`, `ISubscribable`, `IHostConnectivityProbe`, `IAlarmSource`, `IHistoryProvider`, `IPerCallHostResolver`. Each driver opts into whichever it supports.
|
||||
- **Server** is the OPC UA endpoint process (net10, AnyCPU). Hosts every driver in-process. The Galaxy driver reaches MXAccess via gRPC to a separately-installed **mxaccessgw** sidecar (sibling repo); it is no longer hosted from this repo.
|
||||
- **Admin** is the Blazor Server operator UI (net10, x64). Owns the Config DB draft/publish flow, ACL + role-grant authoring, fleet status + `/metrics` scrape endpoint.
|
||||
- **Host** (`src/Server/ZB.MOM.WW.OtOpcUa.Host`) is the single fused binary (.NET 10, AnyCPU). `OTOPCUA_ROLES` env decides what to mount: `admin` (Blazor + control-plane singletons), `driver` (OPC UA endpoint + per-node actors), or both. See [ServiceHosting.md](ServiceHosting.md).
|
||||
- **Cluster + ControlPlane + Runtime + AdminUI + Security** sit between Core and Host. The cluster glues per-node actors into one logical fleet; the control-plane singletons (deploy coordinator, audit writer, redundancy state) live on the admin role-leader. See [Redundancy.md](Redundancy.md).
|
||||
- The Galaxy driver still reaches MXAccess via gRPC to a separately-installed **mxaccessgw** sidecar (sibling repo).
|
||||
|
||||
## Where to find what
|
||||
|
||||
@@ -56,7 +59,7 @@ For Modbus / S7 / AB CIP / AB Legacy / TwinCAT / FOCAS / OPC UA Client specifics
|
||||
| [security.md](security.md) | Transport security profiles, LDAP auth, ACL trie, role grants, OTOPCUA0001 analyzer |
|
||||
| [Redundancy.md](Redundancy.md) | `RedundancyCoordinator`, `ServiceLevelCalculator`, apply-lease, Prometheus metrics |
|
||||
| [Reservations.md](Reservations.md) | Fleet-wide ZTag / SAPID external-ID reservations — publish-time claim, release flow |
|
||||
| [ServiceHosting.md](ServiceHosting.md) | Two-process deploy (Server + Admin) install/uninstall, plus the optional `OtOpcUaWonderwareHistorian` sidecar |
|
||||
| [ServiceHosting.md](ServiceHosting.md) | Single fused `OtOpcUa.Host` binary install/uninstall with `OTOPCUA_ROLES` gating, plus the optional `OtOpcUaWonderwareHistorian` sidecar |
|
||||
| [StatusDashboard.md](StatusDashboard.md) | Pointer — superseded by [v2/admin-ui.md](v2/admin-ui.md) |
|
||||
|
||||
### Client tooling
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Read/Write Operations
|
||||
|
||||
`DriverNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs`) wires the OPC UA stack's per-variable `OnReadValue` and `OnWriteValue` hooks to each driver's `IReadable` and `IWritable` capabilities. Every dispatch flows through `CapabilityInvoker` so the Polly pipeline (retry / timeout / breaker / bulkhead) applies uniformly across Galaxy, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client drivers.
|
||||
`GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) wires the OPC UA stack's per-variable `OnReadValue` and `OnWriteValue` hooks to each driver's `IReadable` and `IWritable` capabilities. Every dispatch flows through `CapabilityInvoker` so the Polly pipeline (retry / timeout / breaker / bulkhead) applies uniformly across Galaxy, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client drivers.
|
||||
|
||||
## Driver vs virtual dispatch
|
||||
|
||||
@@ -60,8 +60,7 @@ Per decision #12, exceptions in the driver's capability call are logged and conv
|
||||
|
||||
## Key source files
|
||||
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — `OnReadValue` / `OnWriteValue` hooks
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/WriteAuthzPolicy.cs` — classification-to-role policy
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/AuthorizationGate.cs` — Phase 6.2 trie gate
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — `OnReadValue` / `OnWriteValue` hooks
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — permission trie + evaluator (`PermissionTrie`, `PermissionTrieCache`, `TriePermissionEvaluator`) that gates Read/Write/Subscribe per the session's resolved LDAP groups
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs` — `ExecuteAsync` / `ExecuteWriteAsync`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IReadable.cs`, `IWritable.cs`, `WriteIdempotentAttribute.cs`
|
||||
|
||||
+78
-72
@@ -1,103 +1,109 @@
|
||||
# Redundancy
|
||||
# Redundancy (v2)
|
||||
|
||||
## Overview
|
||||
|
||||
OtOpcUa supports OPC UA **non-transparent** warm/hot redundancy. Two (or more) OtOpcUa Server processes run side-by-side, share the same Config DB, the same driver backends (Galaxy ZB, MXAccess runtime, remote PLCs), and advertise the same OPC UA node tree. Each process owns a distinct `ApplicationUri`; OPC UA clients see both endpoints via the standard `ServerUriArray` and pick one based on the `ServiceLevel` that each server publishes.
|
||||
OtOpcUa supports OPC UA **non-transparent** warm/hot redundancy. Two or more `OtOpcUa.Host` processes run side-by-side, share the same Config DB, and join the same Akka.NET cluster. Each process owns a distinct `ApplicationUri`; OPC UA clients discover both endpoints by reading `Server.ServerArray` (NodeId `i=2254`) on either node and pick one based on the `ServiceLevel` byte that each server publishes.
|
||||
|
||||
The redundancy surface lives in `src/Server/ZB.MOM.WW.OtOpcUa.Server/Redundancy/`:
|
||||
> **Discovery surface.** The `ServerArray` path on the `Server` object is what each node populates with self + peer `ApplicationUri`s — see `OpcUaApplicationHost.PopulateServerArray` and the per-node `PeerApplicationUris` option below. The redundancy-object-type `ServerUriArray` proper (a child of `Server.ServerRedundancy`) remains deferred pending an SDK object-type upgrade; clients should read `Server.ServerArray` for peer discovery today.
|
||||
|
||||
| Class | Role |
|
||||
|---|---|
|
||||
| `RedundancyCoordinator` | Process-singleton; owns the current `RedundancyTopology` loaded from the `ClusterNode` table. `RefreshAsync` re-reads after `sp_PublishGeneration` so operator role swaps take effect without a process restart. CAS-style swap (`Interlocked.Exchange`) means readers always see a coherent snapshot. |
|
||||
| `RedundancyTopology` | Immutable `(ClusterId, Self, Peers, ServerUriArray, ValidityFlags)` snapshot. |
|
||||
| `ApplyLeaseRegistry` | Tracks in-progress `sp_PublishGeneration` apply leases keyed on `(ConfigGenerationId, PublishRequestId)`. `await using` the disposable scope guarantees every exit path (success / exception / cancellation) decrements the lease; a stale-lease watchdog force-closes any lease older than `ApplyMaxDuration` (default 10 minutes) so a crashed publisher can't pin the node at `PrimaryMidApply`. |
|
||||
| `PeerReachabilityTracker` | Maintains last-known reachability for each peer node over two independent probes — OPC UA ping and HTTP `/healthz`. Both must succeed for `peerReachable = true`. |
|
||||
| `RecoveryStateManager` | Gates transitions out of the `Recovering*` bands; requires dwell + publish-witness satisfaction before allowing a return to nominal. |
|
||||
| `ServiceLevelCalculator` | Pure function `(role, selfHealthy, peerUa, peerHttp, applyInProgress, recoveryDwellMet, topologyValid, operatorMaintenance) → byte`. |
|
||||
| `RedundancyStatePublisher` | Orchestrates inputs into the calculator, pushes the resulting byte to the OPC UA `ServiceLevel` variable via an edge-triggered `OnStateChanged` event, and fires `OnServerUriArrayChanged` when the topology's `ServerUriArray` shifts. |
|
||||
> **v2 change.** v1's operator-managed `ClusterNode.RedundancyRole` column + `RedundancyCoordinator` / `ApplyLeaseRegistry` / `PeerHttpProbeLoop` are gone. Primary/secondary is now derived from **Akka cluster role-leader** for the `driver` role. The operator no longer writes a role into the DB; cluster topology + health drive ServiceLevel automatically.
|
||||
|
||||
## Data model
|
||||
The runtime pieces live in:
|
||||
|
||||
Per-node redundancy state lives in the Config DB `ClusterNode` table (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Entities/ClusterNode.cs`):
|
||||
|
||||
| Column | Role |
|
||||
|---|---|
|
||||
| `NodeId` | Unique node identity; matches `Node:NodeId` in the server's bootstrap `appsettings.json`. |
|
||||
| `ClusterId` | Foreign key into `ServerCluster`. |
|
||||
| `RedundancyRole` | `Primary`, `Secondary`, or `Standalone` (`RedundancyRole` enum in `Configuration/Enums`). |
|
||||
| `ServiceLevelBase` | Per-node base value used to bias nominal ServiceLevel output. |
|
||||
| `ApplicationUri` | Unique-per-node OPC UA ApplicationUri advertised in endpoint descriptions. |
|
||||
|
||||
`ServerUriArray` is derived from the set of peer `ApplicationUri` values at topology-load time and republished when the topology changes.
|
||||
|
||||
## ServiceLevel matrix
|
||||
|
||||
`ServiceLevelCalculator` produces one of the following bands (see `ServiceLevelBand` enum in the same file):
|
||||
|
||||
| Band | Byte | Meaning |
|
||||
| Component | Project | Role |
|
||||
|---|---|---|
|
||||
| `Maintenance` | 0 | Operator-declared maintenance. |
|
||||
| `NoData` | 1 | Self-reported unhealthy (`/healthz` fails). |
|
||||
| `InvalidTopology` | 2 | More than one Primary detected; both nodes self-demote. |
|
||||
| `RecoveringBackup` | 30 | Backup post-fault, dwell not met. |
|
||||
| `BackupMidApply` | 50 | Backup inside a publish-apply window. |
|
||||
| `IsolatedBackup` | 80 | Primary unreachable; Backup says "take over if asked" — does **not** auto-promote (non-transparent model). |
|
||||
| `AuthoritativeBackup` | 100 | Backup nominal. |
|
||||
| `RecoveringPrimary` | 180 | Primary post-fault, dwell not met. |
|
||||
| `PrimaryMidApply` | 200 | Primary inside a publish-apply window. |
|
||||
| `IsolatedPrimary` | 230 | Primary with unreachable peer, retains authority. |
|
||||
| `AuthoritativePrimary` | 255 | Primary nominal. |
|
||||
| `ServiceLevelCalculator` | `OtOpcUa.ControlPlane.Redundancy` | Pure function `(NodeHealthInputs) → byte`. No side effects. |
|
||||
| `RedundancyStateActor` | `OtOpcUa.ControlPlane.Redundancy` | Admin-role cluster singleton; subscribes to cluster topology events, debounces 250ms, broadcasts `RedundancyStateChanged` on the `redundancy-state` DPS topic. |
|
||||
| `DbHealthProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; runs `SELECT 1` against ConfigDb every 5s. Read by health endpoint + redundancy calc. |
|
||||
| `PeerOpcUaProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; pings peer `opc.tcp://peer:4840` (real probe call is staged for follow-up F12). |
|
||||
| `ClusterRoleInfo` | `OtOpcUa.Cluster` | Live view of cluster membership + role-leader; exposes `IClusterRoleInfo` to the rest of the host. |
|
||||
|
||||
The reserved bands (0 Maintenance, 1 NoData, 2 InvalidTopology) take precedence over operational states per OPC UA Part 5 §6.3.34. Operational values occupy 2..255 so spec-compliant clients that treat "<3 = unhealthy" keep working.
|
||||
## ServiceLevel tiers (Part 5 §6.5)
|
||||
|
||||
Standalone nodes (single-instance deployments) report `AuthoritativePrimary` when healthy and `PrimaryMidApply` during publish.
|
||||
`ServiceLevelCalculator.Compute(NodeHealthInputs)` returns a byte in 0..255 by tier:
|
||||
|
||||
## Publish fencing and split-brain prevention
|
||||
| Tier | Byte | Condition |
|
||||
|---|---|---|
|
||||
| Down | 0 | Member status is not `Up` or `Joining` (leaving, removed, exiting). |
|
||||
| Critically degraded | 100 | ConfigDb unreachable AND data is stale. |
|
||||
| Stale | 200 | Data stale but ConfigDb reachable. |
|
||||
| Healthy follower | 240 | DB ok + OPC UA probe ok + not stale. |
|
||||
| Healthy leader | 250 | Healthy + this node is the `driver` role-leader. |
|
||||
|
||||
Any Admin-triggered `sp_PublishGeneration` acquires an apply lease through `ApplyLeaseRegistry.BeginApplyLease`. While the lease is held:
|
||||
Drivers write their computed byte into the OPC UA `ServiceLevel` Variable on each refresh. Clients with the standard redundancy heuristic ("pick the highest ServiceLevel") therefore prefer the role-leader and fall back to followers on its degradation.
|
||||
|
||||
- The calculator reports `PrimaryMidApply` / `BackupMidApply` — clients see the band shift and cut over to the unaffected peer rather than racing against a half-applied generation.
|
||||
- `RedundancyCoordinator.RefreshAsync` is called at the end of the apply window so the post-publish topology becomes visible exactly once, atomically.
|
||||
- The watchdog force-closes any lease older than `ApplyMaxDuration`; a stuck publisher therefore cannot strand a node at `PrimaryMidApply`.
|
||||
## Data flow
|
||||
|
||||
Because role transitions are **operator-driven** (write `RedundancyRole` in the Config DB + publish), the Backup never auto-promotes. An `IsolatedBackup` at 80 is the signal that the operator should intervene; auto-failover is intentionally out of scope for the non-transparent model (decision #154).
|
||||
```
|
||||
Cluster topology event ──┐
|
||||
DB health probe ─────────┤
|
||||
OPC UA peer probe ───────┤
|
||||
▼
|
||||
RedundancyStateActor (admin singleton)
|
||||
│ debounce 250ms
|
||||
▼
|
||||
DPS topic "redundancy-state"
|
||||
│
|
||||
▼
|
||||
Driver nodes' OpcUaPublishActor
|
||||
│
|
||||
▼
|
||||
ServiceLevelCalculator → byte
|
||||
│
|
||||
▼
|
||||
OPC UA ServiceLevel Variable
|
||||
```
|
||||
|
||||
## Metrics
|
||||
The admin singleton is the cluster's only `RedundancyStateActor`. If the admin leader fails over, the new admin node spins up its replacement, re-subscribes to cluster events, and publishes a fresh snapshot from the current `Cluster.State`. There is no DB-persisted state to recover.
|
||||
|
||||
`RedundancyMetrics` in `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/RedundancyMetrics.cs` registers the `ZB.MOM.WW.OtOpcUa.Redundancy` meter on the Admin process. Instruments:
|
||||
## Configuration
|
||||
|
||||
| Name | Kind | Tags | Description |
|
||||
|---|---|---|---|
|
||||
| `otopcua.redundancy.role_transition` | Counter<long> | `cluster.id`, `node.id`, `from_role`, `to_role` | Incremented every time `FleetStatusPoller` observes a `RedundancyRole` change on a `ClusterNode` row. |
|
||||
| `otopcua.redundancy.primary_count` | ObservableGauge<long> | `cluster.id` | Primary-role nodes per cluster — should be exactly 1 in nominal state. |
|
||||
| `otopcua.redundancy.secondary_count` | ObservableGauge<long> | `cluster.id` | Secondary-role nodes per cluster. |
|
||||
| `otopcua.redundancy.stale_count` | ObservableGauge<long> | `cluster.id` | Nodes whose `LastSeenAt` exceeded the stale threshold. |
|
||||
Per-node identity comes from `appsettings.json` + the `OTOPCUA_ROLES` env var:
|
||||
|
||||
Admin `Program.cs` wires OpenTelemetry to the Prometheus exporter when `Metrics:Prometheus:Enabled=true` (default), exposing the meter under `/metrics`. The endpoint is intentionally unauthenticated — fleet conventions put it behind a reverse-proxy basic-auth gate if needed.
|
||||
```json
|
||||
{
|
||||
"Cluster": {
|
||||
"Hostname": "0.0.0.0",
|
||||
"Port": 4053,
|
||||
"PublicHostname": "node-a.lan",
|
||||
"SeedNodes": ["akka.tcp://otopcua@node-a.lan:4053"],
|
||||
"Roles": ["admin", "driver"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Real-time notifications (Admin UI)
|
||||
```
|
||||
OTOPCUA_ROLES=admin,driver
|
||||
```
|
||||
|
||||
`FleetStatusPoller` in `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Hubs/` polls the `ClusterNode` table, records role transitions, updates `RedundancyMetrics.SetClusterCounts`, and pushes a `RoleChanged` SignalR event onto `FleetStatusHub` when a transition is observed. `RedundancyTab.razor` subscribes with `_hub.On<RoleChangedMessage>("RoleChanged", …)` so connected Admin sessions see role swaps the moment they happen.
|
||||
Both nodes share the same `ConfigDb` connection string; `Cluster.PublicHostname` + `Roles` are what makes them distinct in cluster gossip. The first node bootstraps the cluster (its address goes in `SeedNodes`); the second node joins via the same `SeedNodes` list.
|
||||
|
||||
## Configuring a redundant pair
|
||||
There is no longer a `Node:NodeId` setting, no `ClusterNode.RedundancyRole`, no `ServiceLevelBase`. NodeId is derived as `host:port` of the cluster `PublicHostname` (see `ClusterRoleInfo.LocalNode` for the formula).
|
||||
|
||||
Redundancy is configured **in the Config DB, not appsettings.json**. The fields that must differ between the two instances:
|
||||
### Peer URI advertising
|
||||
|
||||
| Field | Location | Instance 1 | Instance 2 |
|
||||
|---|---|---|---|
|
||||
| `NodeId` | `appsettings.json` `Node:NodeId` (bootstrap) | `node-a` | `node-b` |
|
||||
| `ClusterNode.ApplicationUri` | Config DB | `urn:node-a:OtOpcUa` | `urn:node-b:OtOpcUa` |
|
||||
| `ClusterNode.RedundancyRole` | Config DB | `Primary` | `Secondary` |
|
||||
| `ClusterNode.ServiceLevelBase` | Config DB | typically 255 | typically 100 |
|
||||
Each node advertises its partner via `OpcUaApplicationHostOptions.PeerApplicationUris` (an `IList<string>`, default empty). `OpcUaApplicationHost.PopulateServerArray` appends each configured peer URI to the SDK's `IServerInternal.ServerUris` string table after server startup, so that `Server.ServerArray` reads served by `OnReadServerArray` return both self + peers. Set this per-node in `appsettings.json`:
|
||||
|
||||
Shared between instances: `ClusterId`, Config DB connection string, published generation, cluster-level ACLs, UNS hierarchy, driver instances.
|
||||
```json
|
||||
{
|
||||
"OpcUaServer": {
|
||||
"PeerApplicationUris": ["urn:node-b:OtOpcUa"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Role swaps, stand-alone promotions, and base-level adjustments all happen through the Admin UI `RedundancyTab` — the operator edits the `ClusterNode` row in a draft generation and publishes. `RedundancyCoordinator.RefreshAsync` picks up the new topology without a process restart.
|
||||
Node A lists Node B's `ApplicationUri` and vice-versa. Validated by `DualEndpointTests` in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/` — boots two `OpcUaApplicationHost` instances on loopback, asserts a real OPCFoundation client `Session` reading `Server.ServerArray` from Node A sees both URIs.
|
||||
|
||||
## Split-brain
|
||||
|
||||
`akka.conf` configures Akka's split-brain resolver with `active-strategy = keep-oldest`, `stable-after = 15s`, and `failure-detector.threshold = 10.0`. Under a clean partition: the oldest member stays up + the smaller (or younger) side downs itself within ~15 seconds. The `RedundancyStateActor` on the surviving partition re-computes from the post-partition `Cluster.State`.
|
||||
|
||||
There is no operator-driven role swap during a partition. Failover is what the cluster does automatically.
|
||||
|
||||
## Client-side failover
|
||||
|
||||
The OtOpcUa Client CLI at `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI` supports `-F` / `--failover-urls` for automatic client-side failover; for long-running subscriptions the CLI monitors session KeepAlive and reconnects to the next available server, recreating the subscription on the new endpoint. See [`Client.CLI.md`](Client.CLI.md) for the command reference.
|
||||
The OtOpcUa Client CLI at `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI` supports `-F` / `--failover-urls` for automatic client-side failover; for long-running subscriptions the CLI monitors session KeepAlive and reconnects to the next available server, recreating the subscription on the new endpoint. See [`Client.CLI.md`](Client.CLI.md).
|
||||
|
||||
## Depth reference
|
||||
|
||||
For the full decision trail and implementation plan — topology invariants, peer-probe cadence, recovery-dwell policy, compliance-script guard against enum-value drift — see `docs/v2/plan.md` §Phase 6.3.
|
||||
For the full design — message contracts, tiered calculator truth table, recovery semantics — see `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §6.
|
||||
|
||||
@@ -35,7 +35,7 @@ new ScriptedAlarmDefinition(
|
||||
|
||||
## Predicate evaluation
|
||||
|
||||
Alarm predicates reuse the same Roslyn sandbox as virtual tags — `ScriptEvaluator<AlarmPredicateContext, bool>` compiles the source, `TimedScriptEvaluator` wraps it with the configured timeout (default from `TimedScriptEvaluator.DefaultTimeout`), and `DependencyExtractor` statically harvests the tag paths the script reads. The sandbox rules (forbidden types, cancellation, logging sinks) are documented in [VirtualTags.md](VirtualTags.md); ScriptedAlarms does not redefine them. The known resource limits — unbounded script-side memory, the per-publish accretion of dynamically-emitted script assemblies (Core.Scripting-008), and the orphan-thread CPU-budget caveat — are documented in that file as well.
|
||||
Alarm predicates reuse the same Roslyn sandbox as virtual tags — `ScriptEvaluator<AlarmPredicateContext, bool>` compiles the source, `TimedScriptEvaluator` wraps it with the configured timeout (default from `TimedScriptEvaluator.DefaultTimeout`), and `DependencyExtractor` statically harvests the tag paths the script reads. The sandbox rules (forbidden types, cancellation, logging sinks) are documented in [VirtualTags.md](VirtualTags.md); ScriptedAlarms does not redefine them. The known resource limits — unbounded script-side memory and the orphan-thread CPU-budget caveat — are documented in that file as well; per-publish assembly accretion was resolved by the Core.Scripting-008 collectible-`AssemblyLoadContext` rewrite and no longer requires periodic server restarts.
|
||||
|
||||
`AlarmPredicateContext` (`AlarmPredicateContext.cs`) is the script's `ScriptContext` subclass:
|
||||
|
||||
@@ -111,13 +111,13 @@ Emissions map into `AlarmEventArgs` as `AlarmType = Kind.ToString()`, `SourceNod
|
||||
|
||||
## Composition
|
||||
|
||||
`Phase7EngineComposer.Compose` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7EngineComposer.cs`) is the single call site that instantiates the engine. It takes the generation's `Script` / `VirtualTag` / `ScriptedAlarm` rows, the shared `CachedTagUpstreamSource`, an `IAlarmStateStore`, and an `IAlarmHistorianSink`, and returns a `Phase7ComposedSources` the caller owns. When `scriptedAlarms.Count > 0`:
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) is the single call site that instantiates the engine. It takes the generation's `Script` / `VirtualTag` / `ScriptedAlarm` rows, the shared upstream-tag source, an `IAlarmStateStore`, and an `IAlarmHistorianSink`, and returns the composed sources the caller owns. When `scriptedAlarms.Count > 0`:
|
||||
|
||||
1. `ProjectScriptedAlarms` resolves each row's `PredicateScriptId` against the script dictionary and produces a `ScriptedAlarmDefinition` list. Unknown or disabled scripts throw immediately — the DB publish guarantees referential integrity but this is a belt-and-braces check.
|
||||
2. A `ScriptedAlarmEngine` is constructed with the upstream source, the store, a shared `ScriptLoggerFactory` keyed to `scripts-*.log`, and the root Serilog logger.
|
||||
3. `alarmEngine.OnEvent` is wired to `RouteToHistorianAsync`, which projects each emission into an `AlarmHistorianEvent` and enqueues it on the sink. Fire-and-forget — the SQLite store-and-forward sink is already non-blocking.
|
||||
4. `LoadAsync(alarmDefs)` runs synchronously on the startup thread: it compiles every predicate, subscribes to the union of predicate inputs and message-template tokens, seeds the value cache, loads persisted state, re-derives `ActiveState` from a fresh predicate evaluation, and starts the 5s shelving timer. Compile failures are aggregated into one `InvalidOperationException` so operators see every bad predicate in one startup log line rather than one at a time.
|
||||
5. A `ScriptedAlarmSource` is created for the event stream, and a `ScriptedAlarmReadable` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/ScriptedAlarmReadable.cs`) is created for OPC UA variable reads on the alarm's active-state node (task #245) — unknown alarm ids return `BadNodeIdUnknown` rather than silently reading `false`.
|
||||
5. A `ScriptedAlarmSource` is created for the event stream; the v2 `ScriptedAlarmActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`) owns the active-state surface for OPC UA variable reads on the alarm's active-state node (task #245) — unknown alarm ids return `BadNodeIdUnknown` rather than silently reading `false`.
|
||||
|
||||
Both engine and source are added to `Phase7ComposedSources.Disposables`, which `Phase7Composer` disposes on server shutdown.
|
||||
|
||||
@@ -132,5 +132,7 @@ Both engine and source are added to `Phase7ComposedSources.Disposables`, which `
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/AlarmTypes.cs` — `AlarmKind` + the four Part 9 enums
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/MessageTemplate.cs` — `{path}` placeholder resolver
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/IAlarmStateStore.cs` — persistence contract + `InMemoryAlarmStateStore` default
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7EngineComposer.cs` — composition, config-row projection, historian routing
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/ScriptedAlarmReadable.cs` — `IReadable` adapter exposing `ActiveState` to OPC UA variable reads
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — composition, config-row projection, historian routing
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — applies the composed Phase 7 plan into the SDK node manager
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs` — actor wrapper owning the alarm state machine and exposing `ActiveState` for OPC UA variable reads
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynScriptedAlarmEvaluator.cs` — production Roslyn predicate evaluator
|
||||
|
||||
+65
-41
@@ -1,62 +1,86 @@
|
||||
# Service Hosting
|
||||
# Service Hosting (v2)
|
||||
|
||||
## Overview
|
||||
|
||||
A production OtOpcUa deployment runs **two or three processes**, each
|
||||
with a distinct runtime and install surface:
|
||||
A production OtOpcUa deployment runs **one binary per node**, plus the optional Wonderware historian sidecar:
|
||||
|
||||
| Process | Project | Runtime | Platform | Responsibility |
|
||||
|---|---|---|---|---|
|
||||
| **OtOpcUa Server** | `src/Server/ZB.MOM.WW.OtOpcUa.Server` | .NET 10 | x64 | Hosts the OPC UA endpoint; loads every driver in-process (Modbus, S7, AbCip, AbLegacy, TwinCAT, FOCAS, OPC UA Client, Galaxy via mxaccessgw); exposes `/healthz`. |
|
||||
| **OtOpcUa Admin** | `src/Server/ZB.MOM.WW.OtOpcUa.Admin` | .NET 10 (ASP.NET Core / Blazor Server) | x64 | Operator UI for Config DB editing + fleet status, SignalR hubs (`FleetStatusHub`, `AlertHub`), Prometheus `/metrics`. |
|
||||
| **OtOpcUa Wonderware Historian** *(optional)* | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware` | .NET Framework 4.8 | x86 (32-bit) | Out-of-process sidecar exposing the Wonderware Historian SDK over a named pipe. Required only when `Historian:Wonderware:Enabled=true` in `appsettings.json`. |
|
||||
| **OtOpcUa Host** | `src/Server/ZB.MOM.WW.OtOpcUa.Host` | .NET 10 | AnyCPU | Single fused binary. `OTOPCUA_ROLES` env decides what to mount: `admin` (Blazor + auth + control-plane singletons), `driver` (OPC UA endpoint + per-driver actors), or both. |
|
||||
| **OtOpcUa Wonderware Historian** *(optional)* | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware` | .NET Framework 4.8 | x86 (32-bit) | Out-of-process sidecar exposing the Wonderware Historian SDK over a named pipe. Required only when `Historian:Wonderware:Enabled=true`. |
|
||||
|
||||
Galaxy access uses a separately-installed **mxaccessgw** running out
|
||||
of a sibling repo (`c:\Users\dohertj2\Desktop\mxaccessgw\`) — see
|
||||
`docs/v2/Galaxy.ParityRig.md` for setup. The mxaccessgw owns the
|
||||
MXAccess COM bitness constraint (its worker is x86 net48); nothing
|
||||
in the OtOpcUa repo carries that constraint anymore. PR 7.2 retired
|
||||
the legacy in-process `Galaxy.Host` / `Galaxy.Proxy` / `Galaxy.Shared`
|
||||
projects + the `OtOpcUaGalaxyHost` Windows service.
|
||||
Galaxy access still uses the separately-installed **mxaccessgw** sidecar (see `docs/v2/Galaxy.ParityRig.md`); the gateway owns the MXAccess COM bitness constraint (its worker is x86 net48). Nothing in the OtOpcUa repo carries that constraint anymore.
|
||||
|
||||
## OtOpcUa Server
|
||||
> **v2 change.** v1's separate `OtOpcUa.Server` + `OtOpcUa.Admin` Windows services merged into a single role-gated `OtOpcUa.Host` binary. Two installers became one (with a `-Roles` parameter). The whole DI graph is composed in `OtOpcUa.Host/Program.cs`; per-role wiring is conditional on the env var.
|
||||
|
||||
Hosted via `Microsoft.Extensions.Hosting` with `AddWindowsService`
|
||||
(decision #30 — replaced TopShelf in v2). The host's `Build()`
|
||||
returns immediately when launched interactively (e.g. `dotnet run`)
|
||||
but blocks for SCM signals when running as a Windows service.
|
||||
## Role gating
|
||||
|
||||
In-process drivers are registered at startup in `Program.cs`'s
|
||||
`DriverFactoryRegistry` block; the `DriverInstance` rows in the
|
||||
central Config DB select which driver factories materialise into
|
||||
live `IDriver` instances. See `docs/v2/driver-specs.md` for the
|
||||
per-driver `DriverConfig` JSON shapes.
|
||||
`Program.cs` reads `OTOPCUA_ROLES`, parses it with `RoleParser`, and conditionally registers services:
|
||||
|
||||
## OtOpcUa Admin
|
||||
| Role present | Wires |
|
||||
|---|---|
|
||||
| `admin` | `AddOtOpcUaAuth`, `AddAdminUI`, `AddSignalR`, `AddOtOpcUaAdminClients`, `MapOtOpcUaAuth`, `MapAdminUI<App>`, `MapOtOpcUaHubs`, `WithOtOpcUaControlPlaneSingletons` (5 admin singletons via `Akka.Hosting`) |
|
||||
| `driver` | `WithOtOpcUaRuntimeActors` (DriverHostActor + DbHealthProbeActor) — and the OPC UA endpoint on port 4840 |
|
||||
| Either / both | `AddOtOpcUaConfigDb`, `AddOtOpcUaCluster`, `AddOtOpcUaHealth` (`/health/ready`, `/health/active`, `/healthz`) |
|
||||
|
||||
Same hosting model; runs the Blazor Server UI + SignalR hubs.
|
||||
Reads from the same Config DB the Server writes to.
|
||||
Single-node dev: `OTOPCUA_ROLES=admin,driver`. Production: typically two admin nodes (HA pair) + N driver nodes.
|
||||
|
||||
### Per-role configuration overlays
|
||||
|
||||
`Program.cs:33-35` builds a role suffix by joining the parsed roles **alphabetically** with `-` and loads `appsettings.{roleSuffix}.json` as an optional overlay on top of base `appsettings.json`. Three overlays ship in `src/Server/ZB.MOM.WW.OtOpcUa.Host/`:
|
||||
|
||||
- `appsettings.admin.json` — admin-only nodes
|
||||
- `appsettings.driver.json` — driver-only nodes
|
||||
- `appsettings.admin-driver.json` — fused single-node dev / small deployments
|
||||
|
||||
All three carry Serilog log-level overrides + `Security:Ldap:DevStubMode = false`. Loading order is **base `appsettings.json` → role overlay (`appsettings.{role}.json`) → environment overlay (`appsettings.{Environment}.json`)** — later layers win. Overlays are optional; the base file boots a node on its own.
|
||||
|
||||
## Akka cluster
|
||||
|
||||
The host joins an Akka.NET cluster bound to the address in `appsettings.json::Cluster`:
|
||||
|
||||
```json
|
||||
{
|
||||
"Cluster": {
|
||||
"Hostname": "0.0.0.0",
|
||||
"Port": 4053,
|
||||
"PublicHostname": "node-a.lan",
|
||||
"SeedNodes": ["akka.tcp://otopcua@node-a.lan:4053"],
|
||||
"Roles": ["admin", "driver"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `WithOtOpcUaClusterBootstrap` (in `OtOpcUa.Cluster`) loads the embedded HOCON (split-brain resolver, pinned dispatcher, failure detector tuning) and overlays remote endpoint + cluster options.
|
||||
- All cluster singletons + per-node actors live on this single ActorSystem — there is no second Akka instance.
|
||||
|
||||
See [Redundancy.md](Redundancy.md) for the role-leader + ServiceLevel story.
|
||||
|
||||
## Health endpoints
|
||||
|
||||
Both admin and driver nodes expose:
|
||||
|
||||
| Path | Status meaning |
|
||||
|---|---|
|
||||
| `/healthz` | Process alive. |
|
||||
| `/health/ready` | ConfigDb reachable + cluster member state is `Up`. |
|
||||
| `/health/active` | Admin-role leader (the node Traefik or an HA LB should route traffic to). |
|
||||
|
||||
Used by Traefik for the active-leader-only routing pattern (see [Task 63 traefik docs](v2/Architecture-v2.md) — TODO).
|
||||
|
||||
## OtOpcUa Wonderware Historian (optional)
|
||||
|
||||
When `Historian:Wonderware:Enabled=true`, the Server speaks to a
|
||||
sidecar that wraps the Wonderware Historian SDK (which is .NET
|
||||
Framework only). The pipe IPC contract is in
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/Contracts/`
|
||||
and the sidecar's pipe handler lives at
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Pipe/`.
|
||||
|
||||
Install via the `-InstallWonderwareHistorian` switch on
|
||||
`scripts/install/Install-Services.ps1`.
|
||||
Unchanged from v1. Pipe IPC contract lives in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/Contracts/`; sidecar pipe handler in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Pipe/`. Install via `scripts/install/Install-Services.ps1 -InstallWonderwareHistorian`.
|
||||
|
||||
## Install / Uninstall
|
||||
|
||||
- `scripts/install/Install-Services.ps1` — installs `OtOpcUa` and
|
||||
optionally `OtOpcUaWonderwareHistorian`.
|
||||
- `scripts/install/Uninstall-Services.ps1` — stops + removes both,
|
||||
plus `OtOpcUaGalaxyHost` if a pre-7.2 rig still carries it.
|
||||
- `scripts/install/Install-Services.ps1 -Roles admin,driver` — installs `OtOpcUaHost`. v2 rewrite tracked as plan Task 62.
|
||||
- `scripts/install/Uninstall-Services.ps1` — stops + removes the host service (and the historian sidecar if installed).
|
||||
|
||||
## Logging
|
||||
|
||||
Serilog with rolling-daily file sinks. Each service writes to
|
||||
`%ProgramData%\OtOpcUa\<service>-*.log` plus stdout (NSSM-friendly).
|
||||
Serilog with rolling-daily file sinks. Each host writes to `logs/otopcua-*.log` plus stdout (NSSM/systemd-friendly). Per-environment log level overrides go in `appsettings.{Environment}.json`.
|
||||
|
||||
## Depth reference
|
||||
|
||||
For the full host-architecture rationale (why fused vs. split, role-gating tradeoffs, multi-node deployment shapes), see `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §3-4.
|
||||
|
||||
+13
-11
@@ -6,7 +6,7 @@ The runtime is split across two projects: `Core.Scripting` holds the Roslyn sand
|
||||
|
||||
## Roslyn script sandbox (`Core.Scripting`)
|
||||
|
||||
User scripts are compiled via `Microsoft.CodeAnalysis.CSharp.Scripting` against a `ScriptContext` subclass. `ScriptGlobals<TContext>` exposes the context as a field named `ctx`, so scripts read `ctx.GetTag("...")` / `ctx.SetVirtualTag("...", ...)` / `ctx.Now` / `ctx.Logger` and return a value.
|
||||
User scripts are compiled via `Microsoft.CodeAnalysis.CSharp` (regular compiler, not the scripting variant — the original `CSharpScript` pipeline was retired by the Core.Scripting-008 / -016 rewrite, see "Compile cache" below). Each script's source is pasted as the body of a synthesized `CompiledScript.Run(ScriptGlobals<TContext>)` method against a `ScriptContext` subclass. `ScriptGlobals<TContext>` exposes the context as a field named `ctx`, so scripts read `ctx.GetTag("...")` / `ctx.SetVirtualTag("...", ...)` / `ctx.Now` / `ctx.Logger` and return a value.
|
||||
|
||||
### Compile pipeline (`ScriptEvaluator<TContext, TResult>`)
|
||||
|
||||
@@ -30,11 +30,13 @@ Similarly, **`System.Threading.Tasks` is now denied** (Core.Scripting-003), whic
|
||||
|
||||
`ConcurrentDictionary<string, Lazy<ScriptEvaluator<...>>>` keyed on `SHA-256(UTF8(source))` rendered to hex. `Lazy<T>` with `ExecutionAndPublication` mode means two threads racing a miss compile exactly once. Failed compiles evict the entry (via the `TryRemove(KeyValuePair<,>)` overload so a concurrently re-added retry entry is not collateral damage — Core.Scripting-006) so a corrected retry can succeed (used during Admin UI authoring). No capacity bound — scripts are operator-authored and bounded by the config DB. Whitespace changes miss the cache on purpose. `Clear()` is called on config-publish.
|
||||
|
||||
**Per-publish assembly accretion (accepted limitation, Core.Scripting-008).** Each compiled `ScriptEvaluator` holds a Roslyn `ScriptRunner<T>` delegate, which keeps the dynamically-emitted script assembly loaded for the process lifetime. Emitted assemblies in the default `AssemblyLoadContext` cannot be unloaded; `CompiledScriptCache.Clear()` drops the dictionary entries but does **not** unload the underlying assemblies. Across many config-publish generations (each `Clear()` followed by recompiling every script), the process accumulates dead script assemblies. For the expected "low thousands" of scripts this is benign, but a long-running server with very frequent publishes will see steady managed-memory growth that does not return until the process restarts. Out-of-process script evaluation or a collectible `AssemblyLoadContext` is a v3 concern; deployments with high-publish-frequency requirements should schedule a periodic server restart to reclaim the accrued assemblies.
|
||||
**Per-publish assembly unload (Core.Scripting-008 resolved).** Each compiled `ScriptEvaluator` emits its script into a dedicated **collectible** `AssemblyLoadContext` — the BCL escape hatch for assemblies that can be unloaded. The compile path is hand-rolled `CSharpCompilation.Create` + `Emit(MemoryStream)` + `ScriptAssemblyLoadContext.LoadFromStream` rather than the legacy `CSharpScript.CreateDelegate` (which emits into the default ALC and cannot be unloaded). `ScriptEvaluator.Dispose()` calls `AssemblyLoadContext.Unload()` and `CompiledScriptCache.Clear()` disposes every materialised evaluator before dropping its dictionary entry, so the emitted assemblies become eligible for GC immediately after a config-publish. The reclaim is GC-timing-sensitive (Unload is *eligible-for-collection*, not synchronous); the next collection cycle reclaims them. Regression tests `Dispose_unloads_compiled_script_assembly_load_context` and `Clear_disposes_every_materialised_evaluator` in `CompiledScriptCacheTests` lock this contract via `WeakReference` + `GC.Collect()` assertions. Server restarts are no longer required to reclaim compiled-script memory.
|
||||
|
||||
**Scripting authoring convention.** With the collectible-ALC rewrite, the wrapper around a user script is an ordinary C# static method, not a Roslyn `Script` submission. The script body is pasted verbatim as the method body and must therefore end with an explicit `return …;` per ordinary C# rules — the legacy `CSharpScript` "last expression yields result" shorthand is gone. Every script in the existing test corpus already uses explicit `return`; this convention is operator-visible only when authoring a brand-new script from scratch.
|
||||
|
||||
### Per-evaluation timeout (`TimedScriptEvaluator<TContext, TResult>`)
|
||||
|
||||
Wraps `ScriptEvaluator` with a wall-clock budget. Default `DefaultTimeout = 250ms`. Implementation pushes the inner `RunAsync` onto `Task.Run` (so a CPU-bound script can't hog the calling thread before `WaitAsync` registers its timeout) then awaits `runTask.WaitAsync(Timeout, ct)`. A `TimeoutException` from `WaitAsync` is wrapped as `ScriptTimeoutException`. Caller-supplied `CancellationToken` cancellation wins over the timeout and propagates as `OperationCanceledException` — so a shutdown cancel is not misclassified. **Known leak:** when a CPU-bound script times out, the underlying `ScriptRunner` keeps running on its thread-pool thread until the Roslyn runtime returns (documented trade-off; out-of-process evaluation is a v3 concern).
|
||||
Wraps `ScriptEvaluator` with a wall-clock budget. Default `DefaultTimeout = 250ms`. Implementation pushes the inner `RunAsync` onto `Task.Run` (so a CPU-bound script can't hog the calling thread before `WaitAsync` registers its timeout) then awaits `runTask.WaitAsync(Timeout, ct)`. A `TimeoutException` from `WaitAsync` is wrapped as `ScriptTimeoutException`. Caller-supplied `CancellationToken` cancellation wins over the timeout and propagates as `OperationCanceledException` — so a shutdown cancel is not misclassified. **Known leak:** when a CPU-bound script times out, the underlying compiled-script delegate keeps running on its `Task.Run` thread-pool thread until it returns of its own accord (the CT is checked only at evaluator entry; once the script body is running, only the script returning or throwing will release the thread). The post-rewrite delegate is a regular C# `Func<>` bound to the synthesized `CompiledScript.Run` method, so this is a vanilla "synchronous CPU-bound work on a pool thread" leak rather than anything Roslyn-specific. Documented trade-off; out-of-process evaluation is a v3 concern.
|
||||
|
||||
### Script logger plumbing
|
||||
|
||||
@@ -105,13 +107,12 @@ Per [ADR-002](v2/implementation/adr-002-driver-vs-virtual-dispatch.md) Option B,
|
||||
|
||||
`ITagUpstreamSource` and `IHistoryWriter` are the two ports the engine requires from its host. Both live in `Core.VirtualTags`. In the Server process:
|
||||
|
||||
- **`CachedTagUpstreamSource`** (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/CachedTagUpstreamSource.cs`) implements the interface (and the parallel `Core.ScriptedAlarms.ITagUpstreamSource` — identical shape, distinct namespace). A `ConcurrentDictionary<path, DataValueSnapshot>` cache. `Push(path, snapshot)` updates the cache and fans out synchronously to every observer. Reads of never-pushed paths return `BadNodeIdUnknown` quality (`UpstreamNotConfigured = 0x80340000`).
|
||||
- **`DriverSubscriptionBridge`** (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/DriverSubscriptionBridge.cs`) feeds the cache. For each registered `ISubscribable` driver it batches a single `SubscribeAsync` for every fullRef the script graph references, installs an `OnDataChange` handler that translates driver-opaque fullRefs back to UNS paths via a reverse map, and pushes each delta into `CachedTagUpstreamSource`. Unsubscribes on dispose. The bridge suppresses `OTOPCUA0001` (the Roslyn analyzer that requires `ISubscribable` callers to go through `CapabilityInvoker`) on the documented basis that this is a lifecycle wiring, not per-evaluation hot path.
|
||||
- **Upstream-tag feed.** In v2 the upstream-tag feed is provided by the actor system. `DependencyMuxActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs`) multiplexes driver `ISubscribable` subscriptions for every fullRef the script graph references, translating driver-opaque fullRefs back to UNS paths via a reverse map. Deltas land on `VirtualTagActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/VirtualTagActor.cs`) as `DependencyValueChanged` messages; the actor's in-memory cache serves the engine's synchronous `GetTag` reads. Reads of never-pushed paths return `BadNodeIdUnknown` quality (`UpstreamNotConfigured = 0x80340000`).
|
||||
- **`IHistoryWriter`** — no production implementation is currently wired for virtual tags; `VirtualTagEngine` gets `NullHistoryWriter` by default from `Phase7EngineComposer`.
|
||||
|
||||
## Composition
|
||||
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7Composer.cs`) is an `IAsyncDisposable` injected into `OpcUaServerService`:
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) projects the published generation into a `Phase7Plan` that `Phase7Applier` applies to the running SDK node manager:
|
||||
|
||||
1. `PrepareAsync(generationId, ct)` — called after the bootstrap generation loads and before `OpcUaApplicationHost.StartAsync`. Reads the `Script` / `VirtualTag` / `ScriptedAlarm` rows for that generation from the config DB (`OtOpcUaConfigDbContext`). Empty-config fast path returns `Phase7ComposedSources.Empty`.
|
||||
2. Constructs a `CachedTagUpstreamSource` + hands it to `Phase7EngineComposer.Compose`.
|
||||
@@ -143,8 +144,9 @@ Definition reload on config publish: `VirtualTagEngine.Load` is re-entrant — a
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/ITagUpstreamSource.cs` — driver-tag read + subscribe port
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/IHistoryWriter.cs` — historize sink port + `NullHistoryWriter`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagSource.cs` — `IReadable` + `ISubscribable` adapter
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/CachedTagUpstreamSource.cs` — production `ITagUpstreamSource`
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/DriverSubscriptionBridge.cs` — driver `ISubscribable` → cache feed
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7EngineComposer.cs` — row projection + engine instantiation
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7Composer.cs` — lifecycle host: load rows, compose, wire bridge
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — `SelectReadable` + `IsWriteAllowedBySource` dispatch kernel
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/VirtualTagActor.cs` — actor wrapper that owns per-instance state and the synchronous read cache
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs` — driver `ISubscribable` → actor feed (replaces the v1 `DriverSubscriptionBridge`)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynVirtualTagEvaluator.cs` — production Roslyn evaluator wired into the actor
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — row projection + engine instantiation (`Phase7Plan` composer)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — applies the composed plan into the SDK node manager
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — driver-vs-virtual dispatch kernel
|
||||
|
||||
@@ -136,9 +136,10 @@ ConditionType events (non-base `BaseEventType`) is not verified.
|
||||
## Follow-up candidates
|
||||
|
||||
The easiest win here is to **wire the client driver tests against this
|
||||
repo's own server**. The integration test project
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs`
|
||||
already stands up a real OPC UA server on a non-default port with a seeded
|
||||
repo's own server**. The v2 integration test project
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs`
|
||||
(the v2 replacement for the retired v1 `OpcUaServerIntegrationTests`) already
|
||||
stands up a real OPC UA server on a non-default port with a seeded
|
||||
FakeDriver. An `OpcUaClientLiveLoopbackTests` that connects the client
|
||||
driver to that server would give:
|
||||
|
||||
@@ -165,6 +166,6 @@ Beyond that:
|
||||
mocked `Session`
|
||||
- `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs` — ctor +
|
||||
session-factory seam tests mock through
|
||||
- `tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs` —
|
||||
the server-side integration harness a future loopback client test could
|
||||
piggyback on
|
||||
- `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs` —
|
||||
the v2 dual-endpoint integration harness a future loopback client test could
|
||||
piggyback on (v1 `OpcUaServerIntegrationTests.cs` retired with the v1 server project)
|
||||
|
||||
@@ -0,0 +1,451 @@
|
||||
# OtOpcUa v2 — Akka.NET + Fused Hosting Alignment with ScadaLink
|
||||
|
||||
**Status:** Design approved, ready for implementation planning
|
||||
**Date:** 2026-05-26
|
||||
**Branch:** `v2-akka-fuse`
|
||||
**Sister project reference:** `~/Desktop/scadalink-design` (ScadaLink)
|
||||
|
||||
## 1. Motivation
|
||||
|
||||
OtOpcUa today runs as three separate processes (`OtOpcUa.Server` OPC UA host, `OtOpcUa.Admin` Blazor Server web UI, optional `OtOpcUaWonderwareHistorian` Framework sidecar) with manual operator-driven warm-redundancy failover. The sister project ScadaLink — owned by the same developer — solved similar problems with a fused single-binary, role-gated hosting model on top of an Akka.NET cluster.
|
||||
|
||||
The motivation for this refactor is twofold:
|
||||
|
||||
1. **Consistency.** A single developer (the project owner) moves between OtOpcUa and ScadaLink frequently. Sharing patterns — hosting, auth, actor hierarchy, deployment model — reduces cognitive overhead and makes fixes portable.
|
||||
2. **Real HA improvements.** Upgrade OtOpcUa's manual operator-driven failover to automatic, Akka-cluster-driven failover with Traefik routing for the web UI. Preserve OPC UA dual-endpoint client-side failover semantics (clients connect to both nodes and pick based on `ServiceLevel`), now driven automatically by Akka cluster leadership.
|
||||
|
||||
## 2. Architecture overview
|
||||
|
||||
**One binary, role-gated.** `OtOpcUa.Host` (Microsoft.NET.Sdk.Web, .NET 10) replaces `OtOpcUa.Server` and `OtOpcUa.Admin`. Same binary on every node. Role configured via `OTOPCUA_ROLES` environment variable.
|
||||
|
||||
**Two Akka roles, single cluster:**
|
||||
|
||||
- **`admin`** — hosts Blazor web UI + cluster singletons. Singletons pinned via `ClusterSingletonManagerSettings.WithRole("admin")`. Traefik routes `/` to whichever Admin-role node `/health/active` reports as leader.
|
||||
- **`driver`** — hosts OPC UA endpoint + per-node `DriverHostActor` hierarchy. Every Driver-role node always serves OPC UA; `ServiceLevel` computed by `RedundancyStateActor` is broadcast back to each Driver node and used to publish to the local OPC UA address space.
|
||||
|
||||
Roles are additive: `OTOPCUA_ROLES=admin`, `OTOPCUA_ROLES=driver`, or `OTOPCUA_ROLES=admin,driver`. Small deployments run both roles on both nodes; larger deployments separate them.
|
||||
|
||||
**Per-role leadership.** `Cluster.Get(system).State.RoleLeader("driver")` drives OPC UA `ServiceLevel`. `RoleLeader("admin")` drives `/health/active` (Traefik routing). These are independent — admin and driver leadership can land on different nodes if separated.
|
||||
|
||||
**Cluster membership.** Both seed nodes; keep-oldest split-brain resolver; `down-if-alone = on`; 15s stable-after; 2s heartbeat / 10s threshold. CoordinatedShutdown for graceful singleton handover. Exact ScadaLink tuning.
|
||||
|
||||
**OPC UA dual-endpoint preserved.** Driver-role nodes all bind `opc.tcp://0.0.0.0:4840`. Clients still see N endpoints in `ServerUriArray` and fail over via `ServiceLevel`. OPC UA spec compliance unchanged from today.
|
||||
|
||||
**Mac dev:** role `admin,driver,dev` — `dev` short-circuits Windows-only driver registration (Galaxy, Wonderware) with explicit `[DEV-STUB]` log lines.
|
||||
|
||||
## 3. Project & process restructure
|
||||
|
||||
Single solution, ScadaLink-style folder layout. Existing OtOpcUa naming convention (`ZB.MOM.WW.OtOpcUa.*`) preserved.
|
||||
|
||||
### New entry point & deletions
|
||||
|
||||
| Action | Project |
|
||||
|---|---|
|
||||
| **New** | `OtOpcUa.Host` — `Microsoft.NET.Sdk.Web`, single Program.cs, role-gated startup, `AddWindowsService` |
|
||||
| **Delete** | `OtOpcUa.Server` (content migrates) |
|
||||
| **Delete** | `OtOpcUa.Admin` (UI moves to library) |
|
||||
|
||||
### New libraries
|
||||
|
||||
| Project | Owns | ScadaLink analog |
|
||||
|---|---|---|
|
||||
| `OtOpcUa.Commons` | Entity POCOs, interfaces, message contracts (`Types/`, `Interfaces/`, `Entities/`, `Messages/`) | `ScadaLink.Commons` |
|
||||
| `OtOpcUa.ConfigDb` | EF Core `DbContext`, repositories, `IAuditService`, migrations, Data Protection key store | `ScadaLink.ConfigurationDatabase` |
|
||||
| `OtOpcUa.Cluster` | Akka HOCON, `AkkaHostedService`, split-brain resolver config, role-aware membership helpers, `IClusterRoleInfo` | (split out of ScadaLink Host) |
|
||||
| `OtOpcUa.Security` | LDAP bind, cookie+JWT hybrid, JWT issuance, role mapping, `/auth/login`, `/auth/ping` endpoints | `ScadaLink.Security` |
|
||||
| `OtOpcUa.ControlPlane` | Cluster singletons: `ConfigPublishCoordinator`, `AdminOperationsActor`, `AuditWriterActor`, `FleetStatusBroadcaster`, `RedundancyStateActor` | `ScadaLink.ManagementService` |
|
||||
| `OtOpcUa.Runtime` | Per-node actors: `DriverHostActor`, `DriverInstanceActor`, `VirtualTagActor`, `ScriptedAlarmActor`, `OpcUaPublishActor`, `HistorianAdapterActor`, `PeerOpcUaProbeActor`, `DbHealthProbeActor` | `ScadaLink.SiteRuntime` |
|
||||
| `OtOpcUa.OpcUaServer` | OPC UA app host, address-space build, `Phase7Composer` extraction | (in ScadaLink.SiteRuntime/DCL) |
|
||||
| `OtOpcUa.AdminUI` | Blazor components, hubs (`FleetStatusHub`, `AlertHub`, `ScriptLogHub`), auth state provider, `MapAdminUI<TApp>()` | `ScadaLink.CentralUI` |
|
||||
|
||||
### Unchanged
|
||||
|
||||
- Driver projects (`OtOpcUa.Driver.Galaxy`, `.Modbus`, `.S7`, `.AbCip`, `.AbLegacy`, `.TwinCAT`, `.FOCAS`, `.OpcUaClient`) — still implement `IDriver`, now consumed by `DriverInstanceActor` instead of `DriverInstanceBootstrapper`.
|
||||
- `OtOpcUa.Driver.Historian.Wonderware` — .NET Framework 4.8 sidecar, named-pipe IPC, wrapped by a `HistorianAdapterActor` in `OtOpcUa.Runtime`.
|
||||
- `mxaccessgw` sibling repo — unchanged; Galaxy driver still talks gRPC to it.
|
||||
|
||||
### Tests
|
||||
|
||||
- `tests/OtOpcUa.Cluster.Tests` — split-brain, leadership transitions
|
||||
- `tests/OtOpcUa.ControlPlane.Tests` — singleton actor unit tests via Akka.TestKit
|
||||
- `tests/OtOpcUa.Runtime.Tests` — per-node actor tests, driver lifecycle
|
||||
- `tests/OtOpcUa.Security.Tests` — LDAP, cookie+JWT roundtrip
|
||||
- `tests/OtOpcUa.Host.IntegrationTests` — 2-node in-process cluster, deployment flow, failover, Mac-safe
|
||||
- `tests/OtOpcUa.OpcUa.IntegrationTests` — real OPCFoundation client against stubbed Host
|
||||
- `tests/OtOpcUa.E2E.Tests` — full stack with Traefik (nightly CI)
|
||||
|
||||
### Deploy
|
||||
|
||||
- `deploy/Install-Services.ps1` — installs one Windows Service per node (`OtOpcUaHost`), passes role via env var. Old script replaced.
|
||||
- `deploy/traefik/` — Windows Traefik config + service registration for the leader-routed `/health/active`.
|
||||
- `docker-dev/` (new, optional) — 2-node Mac dev compose with stubbed drivers + LDAP + SQL Server + Traefik.
|
||||
|
||||
Solution file: `OtOpcUa.slnx` (matches ScadaLink convention; switch from current `.sln`).
|
||||
|
||||
## 4. Actor hierarchy
|
||||
|
||||
### Per-node tree
|
||||
|
||||
Rooted under `OtOpcUa.Runtime`, one tree per Driver-role node:
|
||||
|
||||
```
|
||||
DriverHostActor (per-node coordinator, started by Host)
|
||||
├─ DriverInstanceActor (per DriverInstance row)
|
||||
│ └─ children = pooled or per-subscription work
|
||||
├─ VirtualTagActor (per VirtualTag row)
|
||||
├─ ScriptedAlarmActor (per ScriptedAlarm row)
|
||||
├─ OpcUaPublishActor (per-node bridge to OPCFoundation address space)
|
||||
├─ HistorianAdapterActor (per-node, wraps Wonderware named-pipe sidecar)
|
||||
├─ PeerOpcUaProbeActor (per-node, tests peer OPC UA stack health)
|
||||
└─ DbHealthProbeActor (per-node, cached DB health probe)
|
||||
```
|
||||
|
||||
### Cluster singletons
|
||||
|
||||
Pinned to `admin` role via `ClusterSingletonManagerSettings.WithRole("admin")`:
|
||||
|
||||
| Actor | Owns | Notes |
|
||||
|---|---|---|
|
||||
| `ConfigPublishCoordinator` | The deploy protocol. Writes `Deployment` row, broadcasts `DispatchDeployment(deploymentId)` via `DistributedPubSub` to every `DriverHostActor`, tracks apply ACKs per node. | Replaces `ApplyLeaseRegistry`. Resumes after failover by re-reading ConfigDb state — no Akka.Persistence. |
|
||||
| `AdminOperationsActor` | All mutating admin ops (CRUD on equipment, drivers, scripts, namespaces, ACLs). Wraps each in an audit envelope. | UI calls via `ClusterSingletonProxy` (in-process when UI is on Admin node). |
|
||||
| `AuditWriterActor` | Receives `AuditEvent` telemetry from any node, batch-inserts into `ConfigAuditLog`. | Idempotent on `EventId`. |
|
||||
| `FleetStatusBroadcaster` | Aggregates Akka cluster member events + per-node `DriverHostStatus` heartbeats. Publishes diffs to `IHubContext<FleetStatusHub>` and `IHubContext<AlertHub>`. | Push-driven; replaces today's 5s `FleetStatusPoller`. |
|
||||
| `RedundancyStateActor` | Subscribes to `ClusterEvent.IMemberEvent` + `ClusterEvent.LeaderChanged` + per-node health probes. Computes `ServiceLevel` byte + `ServerUriArray` per Driver node. Publishes to `DistributedPubSub` topic `redundancy-state`. | Source of truth for OPC UA redundancy. Local `OpcUaPublishActor` subscribes and writes to its OPCFoundation stack. |
|
||||
|
||||
### Supervision
|
||||
|
||||
| Actor | Strategy |
|
||||
|---|---|
|
||||
| `DriverHostActor` | `Resume` |
|
||||
| `DriverInstanceActor` | `Restart` with backoff (1s → 30s, ×1.5, jitter) |
|
||||
| `VirtualTagActor` | `Restart` with backoff |
|
||||
| `ScriptedAlarmActor` | `Restart` with backoff; preserve alarm state via `PreRestart` hook |
|
||||
| `OpcUaPublishActor` | `Resume` |
|
||||
| `HistorianAdapterActor` | `Restart` with backoff; SQLite store-and-forward buffers during pipe outage |
|
||||
| All singletons | `Resume`; resumable state in ConfigDb |
|
||||
| Script execution actors (short-lived) | `Stop` on failure |
|
||||
|
||||
### State machines
|
||||
|
||||
- `DriverInstanceActor` — Become/Stash for `Connecting → Connected → Reconnecting → Failed`. Bad-quality publish on disconnect; transparent re-subscribe on reconnect. Write failures returned synchronously via `Ask` from `OpcUaPublishActor`.
|
||||
- `ConfigPublishCoordinator` — `Idle → Publishing → AwaitingApplyAcks → Sealed`, with timeout-driven escalation if a node fails to ack within `ApplyMaxDuration` (default 10 min).
|
||||
- `RedundancyStateActor` — recomputes on every membership event, debounced 250ms to coalesce bursts.
|
||||
|
||||
### Communication conventions
|
||||
|
||||
- **Tell** for hot-path internal traffic (driver values, alarm state changes, publish broadcasts).
|
||||
- **Ask** only at system boundaries (UI controller → `AdminOperationsActor`, with explicit timeout + cancellation token).
|
||||
- **DistributedPubSub** for cluster-wide broadcasts (`DispatchDeployment`, `RedundancyStateChanged`, `FleetStatusChanged`).
|
||||
- Application-level **correlation IDs** on every request/response message.
|
||||
- Messages live in `OtOpcUa.Commons.Messages.{Drivers,Deploy,Admin,Audit,Redundancy}` — additive-only evolution.
|
||||
|
||||
### Singleton persistence
|
||||
|
||||
No Akka.Persistence. Each singleton reads its resumable state from `ConfigDb` on `PreStart` (e.g., `ConfigPublishCoordinator` reads the current in-flight `Deployment` row + per-node `NodeDeploymentState`) and writes on every state transition.
|
||||
|
||||
### Mac-dev stubs
|
||||
|
||||
`DevNode` role short-circuits driver registration. `DriverInstanceActor` for any Galaxy/Wonderware row enters a `Stubbed` Become state that returns deterministic test values. Logged at INFO with `[DEV-STUB] driver={Name} reason=windows-only`.
|
||||
|
||||
## 5. Web hosting, auth, and SignalR
|
||||
|
||||
### Kestrel startup gated by `admin` role
|
||||
|
||||
`Program.cs` builds `WebApplicationBuilder`, registers all services, but only calls `app.MapBlazor<App>()`, `app.MapHub<...>()`, `app.MapStaticAssets()`, and auth endpoints when `admin ∈ roles`. Driver-only nodes still bind Kestrel for `/healthz` on `:4841` and nothing else.
|
||||
|
||||
### Authentication — cookie+JWT hybrid
|
||||
|
||||
| Layer | Config |
|
||||
|---|---|
|
||||
| Cookie scheme | `OtOpcUa.Auth`, HttpOnly, SameSite=Strict, Secure (prod) / SameAsRequest (dev). Sliding 30-min idle timeout. |
|
||||
| Embedded JWT | HMAC-SHA256, 15-min expiry, claims = `sub`, `roles`, `nodeAcls`. |
|
||||
| LDAP bind | `LdapAuthService.AuthenticateAsync(user, pw)` at `/auth/login` POST — preserved from current `OtOpcUa.Admin/Security`. |
|
||||
| Role mapping | `RoleMapper.MapGroupsToRolesAsync()` — LDAP groups → `FleetAdmin` / `ConfigEditor` / `ReadOnly`. Stays as-is. |
|
||||
| Token issuance | `/auth/token` returns bearer for external clients (CLI, automation). |
|
||||
| Circuit expiry probe | `/auth/ping` returns 200/401, polled by `CookieAuthenticationStateProvider` to detect expiry from inside a SignalR circuit. |
|
||||
| Failure mode | LDAP unreachable → new logins fail, active sessions continue. |
|
||||
|
||||
### Data Protection keys
|
||||
|
||||
`services.AddDataProtection().PersistKeysToDbContext<OtOpcUaConfigDbContext>().SetApplicationName("OtOpcUa")` — keys live in `ConfigDb` so a circuit started on Admin-node A survives if Traefik fails over to Admin-node B mid-session.
|
||||
|
||||
### SignalR hubs
|
||||
|
||||
Three existing hubs preserved (`/hubs/fleet`, `/hubs/alerts`, `/hubs/script-log`):
|
||||
|
||||
- **Today:** `FleetStatusPoller` polls SQL every 5s.
|
||||
- **New:** `FleetStatusBroadcaster` singleton receives Akka cluster events + per-node telemetry, pushes diffs via `IHubContext<FleetStatusHub>`. No polling.
|
||||
- `HubTokenService` bearer-token fallback retired — hubs are circuit-local, cookie auth flows through SignalR natively. External hub consumers use the bearer token from `/auth/token` with a `JwtBearer` authentication scheme declaration on the hub.
|
||||
|
||||
### UI → backend wiring
|
||||
|
||||
- **Reads:** Blazor components inject scoped repositories from DI and read directly from `ConfigDb`. No change from today.
|
||||
- **Writes / mutating ops:** Components inject `IAdminOperationsClient` — a thin wrapper around `ClusterSingletonProxy` to `AdminOperationsActor`. Mutations are `Ask` with a 10s timeout + correlation ID. Audit envelope built UI-side, completed singleton-side.
|
||||
- **Driver diagnostics:** Today's `DriverDiagnosticsClient` HTTP round-trip retires. UI components ask `IFleetDiagnosticsClient` which delegates to `ClusterClientReceptionist`-published actor messages.
|
||||
|
||||
### Health endpoints
|
||||
|
||||
| Endpoint | Returns | Used by |
|
||||
|---|---|---|
|
||||
| `/health/ready` | 200 once Akka member is `Up` + ConfigDb reachable + DataProtection key ring loaded | Service supervisor readiness gate |
|
||||
| `/health/active` | 200 only on the Admin-role leader; 503 elsewhere | Traefik — routes browser traffic to leader |
|
||||
| `/healthz` (existing) | 200 when Driver-role actor system is up + at least one driver registered (preserved on `:4841`) | Ops probes, OPC UA monitoring tools |
|
||||
|
||||
### Traefik
|
||||
|
||||
Windows Service (or external box). One route: `host=otopcua.*` → load-balance to `{admin-node-a:9000, admin-node-b:9000}` with `/health/active` health check, sticky sessions disabled (DataProtection key sharing handles continuity).
|
||||
|
||||
### appsettings structure
|
||||
|
||||
Mirrors ScadaLink's per-component options pattern: `Cluster:`, `Security:`, `ConfigDb:`, `OpcUa:`, `Drivers:`, `Historian:` sections, bound to options classes owned by their respective component projects.
|
||||
|
||||
## 6. Edit + Deploy flow (replaces draft/publish generations)
|
||||
|
||||
The single most consequential domain change: **drop the draft/publish `ConfigGeneration` lifecycle**. Edits are live; deploy is a snapshot+push, ScadaLink-style.
|
||||
|
||||
### Edit model
|
||||
|
||||
- `Equipment`, `Driver`, `DriverInstance`, `Namespace`, `UnsItem`, `Script`, `VirtualTag`, `ScriptedAlarm`, `NodeAcl` are edited **directly** via `AdminOperationsActor`. No draft staging, no `ConfigGeneration` lifecycle. Last-write-wins per row (rowversion column for stale-write detection only).
|
||||
- Live edits do **not** affect running Driver-role nodes — running stacks reflect the *last-deployed* state. The UI shows a "drift" indicator when live ConfigDb state differs from last sealed deployment.
|
||||
- Validation runs on edit (semantic checks: driver tag-path validity, script syntax, namespace name uniqueness) — pulled forward from deploy-time to edit-time.
|
||||
|
||||
### Deploy model
|
||||
|
||||
```
|
||||
Admin UI "Deploy" → AdminOperationsActor.Ask(StartDeployment)
|
||||
AdminOperationsActor:
|
||||
→ snapshot ConfigDb current state
|
||||
→ ConfigComposer.Flatten() → DeploymentArtifact
|
||||
→ compute RevisionHash = SHA256(canonical-serialized artifact)
|
||||
→ write Deployment row (DeploymentId GUID, RevisionHash, CreatedBy, CreatedAtUtc, Status=Dispatching)
|
||||
→ Ask ConfigPublishCoordinator.DispatchDeployment(deploymentId)
|
||||
|
||||
ConfigPublishCoordinator (cluster singleton, admin role):
|
||||
→ write Deployment.Status = Dispatching
|
||||
→ DistributedPubSub Publish to "deployments" topic: DispatchDeployment(deploymentId, revisionHash)
|
||||
→ schedule ApplyDeadline timer (ApplyMaxDuration, default 10 min)
|
||||
|
||||
DriverHostActor (per node, subscribed to "deployments"):
|
||||
receive DispatchDeployment(deploymentId, revisionHash):
|
||||
→ if currentDeploymentRevision == revisionHash → ack Applied (idempotent)
|
||||
→ else:
|
||||
→ acquire per-node ApplyLock (Become Applying(deploymentId))
|
||||
→ write NodeDeploymentState row (NodeId, DeploymentId, StartedAtUtc)
|
||||
→ fetch artifact: read DeploymentArtifact blob from ConfigDb by deploymentId
|
||||
→ diff against current applied artifact → per-instance ApplyDelta plans
|
||||
→ dispatch ApplyDelta to DriverInstanceActor / VirtualTagActor / ScriptedAlarmActor children
|
||||
→ collect per-instance acks (all-or-nothing per node)
|
||||
→ on full success: write GenerationSealedCache (LiteDb local), update NodeDeploymentState.AppliedAtUtc
|
||||
→ on any instance Failure: rollback to previous deployment, mark NodeDeploymentState=Failed
|
||||
→ Tell Coordinator: ApplyAck(deploymentId, nodeId, Applied | Failed(reason))
|
||||
→ Become Steady
|
||||
|
||||
ConfigPublishCoordinator: collect ApplyAcks
|
||||
→ all Driver nodes Applied → Deployment.Status = Sealed → DistributedPubSub PublishDeploymentSealed
|
||||
→ any Failed → Deployment.Status = PartiallyFailed → broadcast DeploymentFailed
|
||||
→ deadline elapsed before all acks → Deployment.Status = TimedOut → broadcast DeploymentTimedOut
|
||||
```
|
||||
|
||||
### Per-instance operation lock
|
||||
|
||||
All mutating commands (deploy, disable, enable, delete) on a `DriverInstance` go through `DriverInstanceActor`, which serializes them via the actor mailbox — single-threaded by construction.
|
||||
|
||||
### Idempotency
|
||||
|
||||
- `DeploymentId` + `RevisionHash` together identify a deployment.
|
||||
- `DriverHostActor` seeing a `DispatchDeployment` whose `RevisionHash` matches current applied state → immediate ack `Applied`, no work. Safe to redeliver.
|
||||
- `Phase7Composer.ComposeAsync(artifact)` is pure; same artifact → same delta plan.
|
||||
- `DriverInstanceActor.ApplyDelta(plan)` compares against current state, applies only diffs.
|
||||
|
||||
### Concurrency control
|
||||
|
||||
- Last-write-wins on edits (no optimistic concurrency on `Equipment`, `Driver`, `Script`, etc.) — matches ScadaLink template behavior.
|
||||
- **Optimistic concurrency on `Deployment` and `NodeDeploymentState` rows** (rowversion column) — prevents two concurrent Coordinator instances (during failover) from corrupting state.
|
||||
|
||||
### Singleton failover during deploy
|
||||
|
||||
1. Old Coordinator wrote `Deployment.Status = Dispatching` + `NodeDeploymentState` rows before broadcast.
|
||||
2. New Coordinator on takeover queries `Deployment` rows with non-terminal `Status`.
|
||||
3. For each in-flight deployment, `Ask` every `DriverHostActor` (via cluster-aware actor selection) for current `NodeDeploymentState`.
|
||||
4. Recompute outstanding-ack set; resume the deadline timer with the remaining time.
|
||||
5. If apply deadline already passed → mark `Deployment.Status = TimedOut` for any unack'd nodes.
|
||||
|
||||
### Crash recovery on Driver node restart
|
||||
|
||||
- `DriverHostActor.PreStart` reads `NodeDeploymentState` for self.
|
||||
- If row says `Applied` for some `DeploymentId` and matches last sealed cache → Become Steady on that artifact.
|
||||
- If row says `Applying` (didn't reach Applied) → discard partial state, re-fetch the artifact, replay apply (idempotent).
|
||||
- If ConfigDb unreachable → fall back to local LiteDb sealed cache, Become `Stale` (drops ServiceLevel via `RedundancyStateActor`). Background reconnect retries every 30s.
|
||||
|
||||
### Schema migration from today
|
||||
|
||||
| Today | New |
|
||||
|---|---|
|
||||
| `ConfigGeneration` (Draft/Published/Sealed lifecycle) | **Dropped** |
|
||||
| `ClusterNodeGenerationState` | Renamed → `NodeDeploymentState` with `(NodeId, DeploymentId, Status, StartedAtUtc, AppliedAtUtc, RowVersion)` |
|
||||
| `ClusterNode.RedundancyRole` column | **Dropped** (Akka leader-of-driver-role is source of truth) |
|
||||
| `ConfigAuditLog` | Kept; deploy events added as new event types |
|
||||
| (new) `Deployment` | `(DeploymentId, RevisionHash, Status, CreatedBy, CreatedAtUtc, ArtifactBlob varbinary(max), RowVersion)` |
|
||||
| (new) `ConfigEdit` audit row per Equipment/Driver/Script edit | Live-edit history |
|
||||
| (new) `DataProtectionKeys` | DataProtection key ring storage |
|
||||
|
||||
No more `ApplyLeaseRegistry` table or watchdog actor. Apply state lives in `NodeDeploymentState`; watchdog is a Coordinator-side scheduled message keyed by `DeploymentId`.
|
||||
|
||||
### Stale-config fallback
|
||||
|
||||
Preserved from today's `GenerationSealedCache`: local LiteDb cache holds last-applied `DeploymentArtifact`. On Host boot with ConfigDb unreachable, `DriverHostActor` boots from cache → Become `Stale` → `RedundancyStateActor` drops `ServiceLevel` for that node.
|
||||
|
||||
### Peer probes consolidated
|
||||
|
||||
| Today | New |
|
||||
|---|---|
|
||||
| `PeerHttpProbeLoop` (HTTP `/healthz`) | Retired — Akka failure detector replaces it |
|
||||
| `PeerUaProbeLoop` (OPC UA `opc.tcp://peer:4840`) | **Retained** as `PeerOpcUaProbeActor` — tests whether the OPC UA stack itself (not just the process) is up. Feeds `RedundancyStateActor`. |
|
||||
| `DbHealthCache` (cached DB probe) | Retained as `DbHealthProbeActor` per-node. Feeds `RedundancyStateActor` + `/health/ready`. |
|
||||
|
||||
### ServiceLevel computation in `RedundancyStateActor`
|
||||
|
||||
```
|
||||
serviceLevel(node) =
|
||||
base 240 if (cluster member Up AND db reachable AND not stale AND opc ua probe ok)
|
||||
base 200 if (member Up AND db reachable AND stale)
|
||||
base 100 if (member Up AND db unreachable AND stale)
|
||||
base 0 if (member Down / Unreachable)
|
||||
|
||||
+10 bonus if Akka driver-role leader is this node
|
||||
```
|
||||
|
||||
ServiceLevel bands match the existing `RedundancyStatePublisher` so OPC UA client behavior is unchanged from today. The leader-bonus replaces today's operator-managed `RedundancyRole = Primary`.
|
||||
|
||||
## 7. Error handling & failure modes
|
||||
|
||||
### Akka cluster failure modes
|
||||
|
||||
| Scenario | Behavior |
|
||||
|---|---|
|
||||
| Network partition (split-brain) | Keep-oldest resolver downs the smaller side after 15s stable-after. `down-if-alone = on` covers isolated nodes. |
|
||||
| Admin leader process crash | Failure detector trips after 10s, downs the member, new singleton instance starts on remaining Admin node. Traefik `/health/active` probe fails over within 1 polling interval (~5s). |
|
||||
| Driver-role node crash | RedundancyStateActor sees member Down → drops that node's ServiceLevel to 0 → OPC UA clients reconnect to surviving node. Both nodes were already running their own copy; no in-cluster recovery needed for that node's work. |
|
||||
| Both Admin nodes down simultaneously | Web UI unavailable. Driver nodes continue serving OPC UA from last-sealed cache. No new deployments possible until Admin node recovers. |
|
||||
| All Driver nodes down | OPC UA endpoints unavailable. Clients reconnect when any Driver node returns. ServiceLevel back to 240 once member Up + DB reachable + apply sealed. |
|
||||
| Singleton handover during deploy | Coordinator state survives in `Deployment` + `NodeDeploymentState` ConfigDb rows. New Coordinator queries DriverHostActors via cluster-aware actor selection. Resume remaining deadline. |
|
||||
|
||||
### ConfigDb unavailability
|
||||
|
||||
- **At edit time:** AdminUI returns user-visible error. No retries — operator decides.
|
||||
- **At deploy time:** Coordinator refuses to start dispatch if it can't write the `Deployment` row.
|
||||
- **At Driver node boot:** Fall back to local LiteDb sealed cache. RedundancyStateActor drops `ServiceLevel`.
|
||||
- **At singleton failover:** New Coordinator's `PreStart` retries via Polly (5 attempts, exponential backoff). If exhausted → singleton crashes → cluster restarts singleton on next viable Admin node.
|
||||
|
||||
### Driver / equipment failures
|
||||
|
||||
- Driver connection loss → `DriverInstanceActor` enters `Reconnecting` Become state, publishes bad-quality to OPC UA address space immediately, retries at fixed interval.
|
||||
- Tag-path-resolution failure → retried periodically.
|
||||
- Write failure to driver → returned synchronously to caller via `Ask` from `OpcUaPublishActor`.
|
||||
- Driver process unresponsive (Galaxy gateway down) → `IDriver.HealthCheck` returns degraded → `DriverInstanceActor` reports to `DriverHostActor` → `RedundancyStateActor` factors into ServiceLevel.
|
||||
|
||||
### Wonderware historian sidecar
|
||||
|
||||
- Named-pipe disconnect → `HistorianAdapterActor` enters `Reconnecting`; alarm history rows buffered to local SQLite store-and-forward.
|
||||
- Sidecar process crash → no in-cluster recovery (external process); operator restarts via Windows Service control.
|
||||
|
||||
### Auth failures
|
||||
|
||||
- LDAP unreachable → `/auth/login` returns 503. Active sessions continue with cached claims.
|
||||
- JWT signature failure (key ring drift) → 401, session terminates. DataProtection keys in ConfigDb prevent this in the happy path.
|
||||
- Cookie expired (sliding 30-min idle) → `/auth/ping` returns 401 → `CookieAuthenticationStateProvider` triggers UI logout.
|
||||
|
||||
### SignalR / circuit drops
|
||||
|
||||
- Blazor circuit dropped → `App.razor` reload script reconnects (preserved from today).
|
||||
- Hub message loss during reconnect → `FleetStatusBroadcaster` resends current state to the reconnecting client on `OnConnectedAsync` (full snapshot, not just diffs).
|
||||
|
||||
### OPC UA stack failures
|
||||
|
||||
- Address-space corruption → `OpcUaPublishActor` logs ERROR, sends `RebuildAddressSpace` to itself; sequence number bump notifies clients to resubscribe.
|
||||
- OPC UA listener bind failure (port collision) → Host fails readiness probe, supervisor restarts service.
|
||||
|
||||
### Audit invariants
|
||||
|
||||
- Audit write failures **never abort** the user-facing action. `AuditWriterActor` buffer overflow → log WARN, drop oldest (with counter metric). The action's success/failure path is authoritative.
|
||||
- All deploy + edit events carry `ExecutionId` (per-request correlation) so audit rows for one operator action share an ID.
|
||||
|
||||
## 8. Testing strategy
|
||||
|
||||
Test projects mirror the new layering. Test infrastructure stays Mac-friendly: stubbed Windows-only drivers, ephemeral SQL Server (LocalDB on Windows / `mcr.microsoft.com/mssql/server` container on Mac), `OpenLDAP` container, all spun up via `tests/docker-compose.yml`.
|
||||
|
||||
### Layered test pyramid
|
||||
|
||||
| Layer | Project | What it covers |
|
||||
|---|---|---|
|
||||
| **Unit** | `OtOpcUa.Runtime.Tests` | Per-actor logic via `Akka.TestKit.Xunit2`. `DriverInstanceActor` state-machine transitions, `Phase7Composer` purity, `ScriptedAlarmActor` state machine, `VirtualTagActor` expression eval. Drivers mocked via `IDriver` test doubles. |
|
||||
| **Unit** | `OtOpcUa.ControlPlane.Tests` | Singleton actor logic. `ConfigPublishCoordinator` happy path + timeout + concurrent ack ordering. `RedundancyStateActor` ServiceLevel computation truth table. `AuditWriterActor` batch flush + idempotency on duplicate `EventId`. |
|
||||
| **Unit** | `OtOpcUa.Cluster.Tests` | Split-brain resolver config validation, role-aware membership helpers, HOCON parses. |
|
||||
| **Unit** | `OtOpcUa.Security.Tests` | LDAP role mapping, JWT issuance, cookie+JWT roundtrip, `/auth/ping` expiry semantics. |
|
||||
| **Integration** | `OtOpcUa.Host.IntegrationTests` | 2-node in-process Akka cluster. Real SQL Server, stubbed drivers. Tests: deploy happy path, deploy timeout, deploy with one node down, singleton failover mid-deploy, ConfigDb outage + stale-config fallback, edit-then-deploy roundtrip, audit row emission. |
|
||||
| **Integration** | `OtOpcUa.OpcUa.IntegrationTests` | Real OPCFoundation client connects to a running stubbed Host. Asserts: dual endpoint visible, ServerUriArray populated, ServiceLevel reflects leader status, browse + read + write through `OpcUaPublishActor`, write failures returned synchronously. |
|
||||
| **End-to-end** | `OtOpcUa.E2E.Tests` | Full Host with Traefik in front, two Admin nodes + two Driver nodes (4 processes via Docker). Verifies: web UI login via LDAP, deploy from UI flows to OPC UA stack, kill admin leader → Traefik fails over within 25s, kill driver node → OPC UA clients reconnect with correct ServiceLevel. CI nightly. |
|
||||
|
||||
### Failover-specific test cases
|
||||
|
||||
1. Kill Admin leader during `Dispatching` phase → new Coordinator resumes, deployment seals.
|
||||
2. Kill Admin leader during `AwaitingApplyAcks` → new Coordinator queries DriverHostActors, completes ack collection.
|
||||
3. Kill Driver node during `Applying` → Coordinator marks that node's `NodeDeploymentState=Failed` after deadline; surviving Driver nodes complete their apply.
|
||||
4. Restart Driver node mid-deploy → on restart, replays apply (idempotent).
|
||||
5. Akka split-brain (network partition between 2 admin nodes) → keep-oldest wins, smaller side downs itself within 15s.
|
||||
6. Both Admin nodes restart simultaneously → deployments in `Dispatching` resume cleanly after cluster reforms.
|
||||
7. Concurrent edits to the same `DriverInstance` from two UI sessions → last write wins, both audit rows present, no row corruption.
|
||||
|
||||
### Deploy idempotency tests
|
||||
|
||||
- Replay `DispatchDeployment` with same `DeploymentId/RevisionHash` → no work, ack `Applied`.
|
||||
- Apply same `DeploymentArtifact` twice in a row → second application is a no-op.
|
||||
- Crash DriverHostActor mid-apply, restart → resumes from `NodeDeploymentState`, completes idempotently.
|
||||
|
||||
### Property tests
|
||||
|
||||
- `Phase7Composer.ComposeAsync` is pure: same artifact → same plan, no side effects.
|
||||
- `RedundancyStateActor` ServiceLevel computation: every combination of (member-state, db-ok, stale, opc-ok, is-leader) produces expected byte.
|
||||
- Audit envelope generation: every mutating op produces exactly one audit row with stable `ExecutionId` correlation.
|
||||
|
||||
### Mac-dev test invariants
|
||||
|
||||
- All unit + integration tests run on macOS without Windows-only assemblies.
|
||||
- Cluster tests use in-process Akka.Remote on 127.0.0.1.
|
||||
- LDAP tests use `OpenLDAP` container or `Security:Ldap:DevStubMode=true`.
|
||||
|
||||
### Retired tests
|
||||
|
||||
Anything touching `ConfigGeneration` lifecycle, `ApplyLeaseRegistry`, `PeerHttpProbeLoop`, `FleetStatusPoller`, `RedundancyCoordinator` peer-probe loops, `RedundancyStatePublisher`.
|
||||
|
||||
## 9. Risks & open questions
|
||||
|
||||
1. **Akka.NET on .NET 10.** Verify Akka.NET 1.5+ targets .NET 10 cleanly.
|
||||
2. **OPCFoundation SDK threading.** The OPC UA stack runs its own threadpool. `OpcUaPublishActor` must marshal writes via thread-safe wrappers; use a dedicated `synchronized-dispatcher` for actors that touch the OPC UA address space.
|
||||
3. **Failure detector tuning.** ScadaLink's 2s/10s is tuned for site-to-central RTT. Benchmark before locking. Aggressive tuning + GC pauses → spurious singleton handover.
|
||||
4. **ServiceLevel = Akka leader removes operator control.** No escape hatch in v1. If a customer needs one later, add a `PinnedPrimary` column to `ClusterNode` and an override path in `RedundancyStateActor`. Out of scope now.
|
||||
5. **Long-lived v2 branch drift.** Monthly rebase from main, CI runs on v2 from day one.
|
||||
6. **Schema migration is destructive.** Dropping `ConfigGeneration` + `ClusterNode.RedundancyRole` is one-way. Cutover must run on a quiesced system. Provide a `Migrate-To-V2.ps1` script that backs up ConfigDb, runs EF migrations, validates row counts, prints a summary.
|
||||
7. **Wonderware + mxaccessgw still external processes.** Both untouched by this refactor. Future actorization would be a second refactor.
|
||||
8. **Audit row volume.** Edit-heavy install ≈ 5k rows/day. Need monthly partition + 365-day retention same as ScadaLink #23.
|
||||
|
||||
## 10. Migration plan
|
||||
|
||||
Big-bang on `v2-akka-fuse` branch:
|
||||
|
||||
1. Branch `v2-akka-fuse` off `main`.
|
||||
2. Add new projects: `OtOpcUa.Host`, `.Cluster`, `.Security`, `.ControlPlane`, `.Runtime`, `.ConfigDb`, `.Commons`, `.AdminUI`, `.OpcUaServer`. Convert to `OtOpcUa.slnx`.
|
||||
3. Move ConfigDb access (EF context, repos, migrations) out of `Server` and `Admin` into `OtOpcUa.ConfigDb`. Add DataProtection key store table.
|
||||
4. Move LDAP + cookie + JWT out of `Admin/Security` into `OtOpcUa.Security`. Adopt 15-min JWT / 30-min sliding cookie / `/auth/ping`.
|
||||
5. Build `OtOpcUa.Cluster`: HOCON, `AkkaHostedService`, role-aware membership helpers, split-brain resolver.
|
||||
6. Build `OtOpcUa.ControlPlane`: `ConfigPublishCoordinator`, `AdminOperationsActor`, `AuditWriterActor`, `FleetStatusBroadcaster`, `RedundancyStateActor`.
|
||||
7. Build `OtOpcUa.Runtime`: `DriverHostActor`, `DriverInstanceActor`, `VirtualTagActor`, `ScriptedAlarmActor`, `OpcUaPublishActor`, `HistorianAdapterActor`, `PeerOpcUaProbeActor`, `DbHealthProbeActor`.
|
||||
8. Migrate `Phase7Composer` to `OtOpcUa.OpcUaServer`; make it pure and unit-tested.
|
||||
9. Move Blazor components from `Admin` into `OtOpcUa.AdminUI` library; replace `DriverDiagnosticsClient` HTTP with in-process actor calls; rewire `FleetStatusHub` / `AlertHub` / `ScriptLogHub` to be fed by `FleetStatusBroadcaster` `IHubContext`.
|
||||
10. Build `OtOpcUa.Host` `Program.cs`: role-gated startup, health endpoints (`/health/ready`, `/health/active`, `/healthz`), `AddWindowsService`.
|
||||
11. ConfigDb migration: add `Deployment`, `ConfigEdit`, `DataProtectionKeys` tables; rename `ClusterNodeGenerationState` → `NodeDeploymentState`; drop `ConfigGeneration`; drop `ClusterNode.RedundancyRole`. EF migration + idempotent SQL script + `Migrate-To-V2.ps1`.
|
||||
12. Delete `OtOpcUa.Server`, `OtOpcUa.Admin`, `DriverInstanceBootstrapper`, `RedundancyCoordinator`, `RedundancyStatePublisher`, `ApplyLeaseRegistry`, `FleetStatusPoller`, `PeerHttpProbeLoop`, `HubTokenService`. Sweep any `*RedundancyRole*` references.
|
||||
13. Update `deploy/Install-Services.ps1`: single Windows Service per node, role via env var, Traefik service registration.
|
||||
14. Update docs in `docs/`: rewrite `Redundancy.md`, `ServiceHosting.md`; add `Cluster.md`, `ControlPlane.md`, `Runtime.md`. Add top-level `Architecture-v2.md` summary.
|
||||
15. CI: add integration test job for the 2-node cluster + OPC UA roundtrip.
|
||||
16. Tag the last v1 release on `main` for backport-only fixes. Merge `v2-akka-fuse` → `main` when GA.
|
||||
@@ -0,0 +1,716 @@
|
||||
# Akka Hosting Alignment — Gap Closeout Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use `superpowers-extended-cc:executing-plans` to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Close the four real/cosmetic gaps identified by the audit of `docs/plans/2026-05-26-akka-hosting-alignment-plan.md` so the v2 implementation matches the plan's literal contract (per-role appsettings overlays, explicit dual-endpoint visibility test, plan-prescribed filenames, removal of empty legacy directories).
|
||||
|
||||
**Architecture:** Additive only. No production-runtime semantics change. One small extension to `OpcUaApplicationHost` so the OPC UA server can advertise peer URIs in `Server.ServerArray` — gated on a new option, defaults to old behavior. Everything else is JSON, test code, file moves, and `rm -rf` of stale bin/obj trees.
|
||||
|
||||
**Tech Stack:** .NET 10, OPCFoundation .NET Standard SDK (`Opc.Ua.*`), xunit.v3, Shouldly, EF Core 10 (inherited; no schema changes).
|
||||
|
||||
**Source plan:** `docs/plans/2026-05-26-akka-hosting-alignment-plan.md`. The audit findings closed by this plan map to Tasks 54, 59, 60, and the post-Task-56 cosmetic cleanup. **Read the source plan's "Conventions for every task" block — those rules still apply here.**
|
||||
|
||||
**Branch:** `v2-gap-closeout` off `master`.
|
||||
|
||||
---
|
||||
|
||||
## Conventions for every task
|
||||
|
||||
- **Branch:** Stay on `v2-gap-closeout`. Never commit to `master` while plan is running.
|
||||
- **Build command:** `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — must be green before commit.
|
||||
- **Test command:** `dotnet test ZB.MOM.WW.OtOpcUa.slnx --no-build` — relevant new/changed tests must pass.
|
||||
- **Commit format:** Conventional Commits matching the source plan — `feat(host):`, `test(opcua):`, `chore(cleanup):`, `refactor(test):`, etc.
|
||||
- **Mac compatibility:** All code must build on macOS. The new dual-endpoint test boots two real OPC UA servers on loopback — works on macOS (no Windows-only APIs needed; PKI is created under a per-test temp dir).
|
||||
|
||||
---
|
||||
|
||||
## Task 0: Add three role-overlay appsettings files (Task 54 gap)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 1, Task 5, Task 6
|
||||
|
||||
**Files:**
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin.json`
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.driver.json`
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin-driver.json`
|
||||
|
||||
**Background:**
|
||||
`Program.cs` line 33-35 loads `appsettings.{role-suffix}.json` where the suffix is the roles joined alphabetically with `'-'`. Today the loader passes `optional: true`, so the host boots without these files — but the source plan (Task 54) called them out as required scaffolding so operators have per-role tunable defaults.
|
||||
|
||||
Suffix matrix:
|
||||
| `OTOPCUA_ROLES` env | Loaded file |
|
||||
|---|---|
|
||||
| `admin` | `appsettings.admin.json` |
|
||||
| `driver` | `appsettings.driver.json` |
|
||||
| `admin,driver` (any order) | `appsettings.admin-driver.json` (joined alphabetical) |
|
||||
|
||||
**Step 1: Create `appsettings.admin.json`**
|
||||
|
||||
Admin-only nodes don't bind drivers; tighten Serilog and disable the LDAP dev stub by default.
|
||||
|
||||
```json
|
||||
{
|
||||
"Serilog": {
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Akka": "Information"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Security": {
|
||||
"Ldap": {
|
||||
"DevStubMode": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Create `appsettings.driver.json`**
|
||||
|
||||
Driver-only nodes have no Admin UI; raise OPC UA verbosity slightly so per-node diagnostics flow to logs.
|
||||
|
||||
```json
|
||||
{
|
||||
"Serilog": {
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Opc.Ua": "Debug",
|
||||
"Akka": "Information"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Security": {
|
||||
"Ldap": {
|
||||
"DevStubMode": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 3: Create `appsettings.admin-driver.json`**
|
||||
|
||||
Combined-role nodes (the docker-dev compose default + the integration test harness) — turn on both surfaces with shared defaults.
|
||||
|
||||
```json
|
||||
{
|
||||
"Serilog": {
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Opc.Ua": "Information",
|
||||
"Akka": "Information"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Security": {
|
||||
"Ldap": {
|
||||
"DevStubMode": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4: Build green check**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: succeeds. (JSON files do not break the build; this is a smoke check that nothing else regressed.)
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin.json \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.driver.json \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin-driver.json
|
||||
git commit -m "feat(host): add per-role appsettings overlays for admin/driver/admin-driver"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Extend `OpcUaApplicationHost` with `PeerApplicationUris` + populate `Server.ServerArray`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 0, Task 5, Task 6
|
||||
|
||||
**Files:**
|
||||
- Modify: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs` (add option + post-start population)
|
||||
- Test: `/Users/dohertj2/Desktop/OtOpcUa/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/OpcUaApplicationHostServerArrayTests.cs`
|
||||
|
||||
**Background:**
|
||||
The source plan's Task 60 promised a test where "real OPCFoundation client → both endpoints visible in ServerUriArray". That requires production code to populate the peer URIs onto each server's `Server.ServerArray` (NodeId i=2254) property. No such code exists in v2 today — this task adds it as an opt-in option so existing single-node tests keep their current behavior. Task 3 then writes the integration test that drives it across two servers.
|
||||
|
||||
**Step 1: Write the failing unit test**
|
||||
|
||||
Create `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/OpcUaApplicationHostServerArrayTests.cs`:
|
||||
|
||||
```csharp
|
||||
using System.IO;
|
||||
using System.Net.Sockets;
|
||||
using System.Net;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Server;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Audit gap closeout — verifies <see cref="OpcUaApplicationHostOptions.PeerApplicationUris"/>
|
||||
/// is reflected in <c>Server.ServerArray</c> after start. Single-server in-process check; the
|
||||
/// cross-server visibility check lives in <c>OtOpcUa.OpcUaServer.IntegrationTests</c>.
|
||||
/// </summary>
|
||||
public sealed class OpcUaApplicationHostServerArrayTests
|
||||
{
|
||||
[Fact]
|
||||
public async Task ServerArray_contains_local_uri_and_configured_peers_after_start()
|
||||
{
|
||||
var pkiRoot = Path.Combine(Path.GetTempPath(), $"otopcua-pki-{Guid.NewGuid():N}");
|
||||
try
|
||||
{
|
||||
var options = new OpcUaApplicationHostOptions
|
||||
{
|
||||
ApplicationName = "OtOpcUa.UnitTest",
|
||||
ApplicationUri = "urn:OtOpcUa.UnitTest.NodeA",
|
||||
OpcUaPort = AllocateFreePort(),
|
||||
PublicHostname = "127.0.0.1",
|
||||
PkiStoreRoot = pkiRoot,
|
||||
PeerApplicationUris = new[] { "urn:OtOpcUa.UnitTest.NodeB" },
|
||||
};
|
||||
|
||||
var server = new StandardServer();
|
||||
await using var host = new OpcUaApplicationHost(options, NullLogger<OpcUaApplicationHost>.Instance);
|
||||
await host.StartAsync(server, CancellationToken.None);
|
||||
|
||||
var serverArray = server.CurrentInstance.ServerObject.ServerArray.Value;
|
||||
serverArray.ShouldNotBeNull();
|
||||
serverArray.ShouldContain("urn:OtOpcUa.UnitTest.NodeA");
|
||||
serverArray.ShouldContain("urn:OtOpcUa.UnitTest.NodeB");
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (Directory.Exists(pkiRoot)) Directory.Delete(pkiRoot, recursive: true);
|
||||
}
|
||||
}
|
||||
|
||||
private static int AllocateFreePort()
|
||||
{
|
||||
var listener = new TcpListener(IPAddress.Loopback, 0);
|
||||
listener.Start();
|
||||
var port = ((IPEndPoint)listener.LocalEndpoint).Port;
|
||||
listener.Stop();
|
||||
return port;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run the test — confirm it fails**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests --filter "FullyQualifiedName~OpcUaApplicationHostServerArrayTests"`
|
||||
Expected: FAIL with `PeerApplicationUris` not found (compile error) — the option doesn't exist yet.
|
||||
|
||||
**Step 3: Add the option**
|
||||
|
||||
Edit `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs`. Add to `OpcUaApplicationHostOptions` (after `AutoAcceptUntrustedClientCertificates`, around line 65):
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Peer server URIs published in <c>Server.ServerArray</c> after start, in addition to
|
||||
/// the local <see cref="ApplicationUri"/>. Empty by default — set this on warm-redundancy
|
||||
/// deployments so OPC UA clients can discover the partner endpoint via the standard
|
||||
/// Server.ServerArray property (NodeId i=2254). Order does not matter; the local URI
|
||||
/// is always element 0.
|
||||
/// </summary>
|
||||
public IList<string> PeerApplicationUris { get; set; } = new List<string>();
|
||||
```
|
||||
|
||||
**Step 4: Populate `Server.ServerArray` after start**
|
||||
|
||||
Edit `OpcUaApplicationHost.StartAsync` (around line 100-118). After the `_application.Start(server)` call and before the log line, insert:
|
||||
|
||||
```csharp
|
||||
PopulateServerArray();
|
||||
```
|
||||
|
||||
Then add the private method below `AttachUserAuthenticator`:
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Writes the union of <see cref="OpcUaApplicationHostOptions.ApplicationUri"/> and
|
||||
/// <see cref="OpcUaApplicationHostOptions.PeerApplicationUris"/> to the OPC UA standard
|
||||
/// <c>Server.ServerArray</c> property (NodeId i=2254). Clients in a warm-redundancy
|
||||
/// deployment discover the partner endpoint by reading this property.
|
||||
/// </summary>
|
||||
private void PopulateServerArray()
|
||||
{
|
||||
var serverObject = _server?.CurrentInstance?.ServerObject;
|
||||
if (serverObject is null) return;
|
||||
|
||||
var uris = new List<string> { _options.ApplicationUri };
|
||||
foreach (var peer in _options.PeerApplicationUris)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(peer) && !uris.Contains(peer))
|
||||
uris.Add(peer);
|
||||
}
|
||||
serverObject.ServerArray.Value = uris.ToArray();
|
||||
}
|
||||
```
|
||||
|
||||
**Step 5: Run the test — confirm it passes**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests --filter "FullyQualifiedName~OpcUaApplicationHostServerArrayTests"`
|
||||
Expected: PASS. If `ServerObject.ServerArray.Value` is read-only (some SDK versions guard it), fall back to writing through `ServerArrayNode.Value` via the address-space accessor — but try the direct write first; the SDK exposes it as a settable BaseDataVariableState on `ServerObjectState`.
|
||||
|
||||
**Step 6: Run full OpcUaServer.Tests suite to confirm no regression**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests`
|
||||
Expected: all tests pass — `PopulateServerArray` is additive when `PeerApplicationUris` is empty (default), so existing tests don't change behavior.
|
||||
|
||||
**Step 7: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/OpcUaApplicationHostServerArrayTests.cs
|
||||
git commit -m "feat(opcua): OpcUaApplicationHost publishes peer URIs in Server.ServerArray"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Create `OtOpcUa.OpcUaServer.IntegrationTests` project
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 5, Task 6 (file moves elsewhere)
|
||||
**Depends on:** none (csproj is self-contained)
|
||||
|
||||
**Files:**
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests.csproj`
|
||||
- Modify: `/Users/dohertj2/Desktop/OtOpcUa/ZB.MOM.WW.OtOpcUa.slnx` (add the project)
|
||||
|
||||
**Background:**
|
||||
The source plan's Task 60 named this exact project. Audit found ServiceLevel coverage relocated to other test projects but no `OpcUaServer.IntegrationTests` project exists. Creating the project skeleton in its own task keeps Task 3's commit focused on the test code.
|
||||
|
||||
**Step 1: Create the csproj**
|
||||
|
||||
Mirror the conventions in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests.csproj`. The integration project needs the `Opc.Ua.Client` package (vs. only `Opc.Ua.Server` in the unit tests) — confirm the version against the existing client CLI's csproj: `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI/ZB.MOM.WW.OtOpcUa.Client.CLI.csproj`.
|
||||
|
||||
```xml
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<IsPackable>false</IsPackable>
|
||||
<IsTestProject>true</IsTestProject>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests</RootNamespace>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="xunit.v3"/>
|
||||
<PackageReference Include="Shouldly"/>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Client"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Configuration"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions"/>
|
||||
<PackageReference Include="xunit.runner.visualstudio">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\src\Server\ZB.MOM.WW.OtOpcUa.OpcUaServer\ZB.MOM.WW.OtOpcUa.OpcUaServer.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
```
|
||||
|
||||
If `OPCFoundation.NetStandard.Opc.Ua.Client` isn't in `Directory.Packages.props`, add it there (mirror the existing `OPCFoundation.NetStandard.Opc.Ua.Server` version exactly).
|
||||
|
||||
**Step 2: Add project to the solution**
|
||||
|
||||
Run: `dotnet sln ZB.MOM.WW.OtOpcUa.slnx add tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests.csproj`
|
||||
Expected: "Project added to the solution."
|
||||
|
||||
**Step 3: Build green check**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: builds. (Empty project, so no test discovery yet — `dotnet test` would say "no tests".)
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ \
|
||||
ZB.MOM.WW.OtOpcUa.slnx \
|
||||
Directory.Packages.props # only if the Opc.Ua.Client version was added there
|
||||
git commit -m "test(opcua): scaffold OtOpcUa.OpcUaServer.IntegrationTests project"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 3: `DualEndpointTests` — real OPC UA client reads both URIs from `Server.ServerArray`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 5, Task 6
|
||||
**Depends on:** Task 1 (PeerApplicationUris wiring), Task 2 (IT project exists)
|
||||
|
||||
**Files:**
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs`
|
||||
|
||||
**Background:**
|
||||
This is the explicit Task 60 deliverable: a real OPC UA client connects to one server and confirms it can discover the partner via `Server.ServerArray`. Single-server unit-side coverage exists in Task 1; this test exercises the wire path with both servers up.
|
||||
|
||||
**Step 1: Write the test**
|
||||
|
||||
```csharp
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Client;
|
||||
using Opc.Ua.Configuration;
|
||||
using Opc.Ua.Server;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests;
|
||||
|
||||
/// <summary>
|
||||
/// Source plan Task 60 — closes the audit gap. Boots two real <see cref="StandardServer"/>
|
||||
/// instances on loopback, each configured with the other's <c>ApplicationUri</c> in
|
||||
/// <see cref="OpcUaApplicationHostOptions.PeerApplicationUris"/>. A real OPC UA client connects
|
||||
/// to Node A, reads <c>Server.ServerArray</c>, and asserts both URIs are visible — the
|
||||
/// warm-redundancy discovery contract clients depend on.
|
||||
/// </summary>
|
||||
public sealed class DualEndpointTests
|
||||
{
|
||||
private const string NodeAUri = "urn:OtOpcUa.DualEndpoint.NodeA";
|
||||
private const string NodeBUri = "urn:OtOpcUa.DualEndpoint.NodeB";
|
||||
|
||||
[Fact]
|
||||
public async Task Client_reads_both_ApplicationUris_from_NodeA_ServerArray()
|
||||
{
|
||||
var pkiRootA = Path.Combine(Path.GetTempPath(), $"otopcua-pki-a-{Guid.NewGuid():N}");
|
||||
var pkiRootB = Path.Combine(Path.GetTempPath(), $"otopcua-pki-b-{Guid.NewGuid():N}");
|
||||
var portA = AllocateFreePort();
|
||||
var portB = AllocateFreePort();
|
||||
|
||||
try
|
||||
{
|
||||
await using var nodeA = await StartNodeAsync(NodeAUri, portA, pkiRootA, peers: new[] { NodeBUri });
|
||||
await using var nodeB = await StartNodeAsync(NodeBUri, portB, pkiRootB, peers: new[] { NodeAUri });
|
||||
|
||||
var serverArray = await ReadServerArrayAsync($"opc.tcp://127.0.0.1:{portA}/OtOpcUa");
|
||||
serverArray.ShouldContain(NodeAUri);
|
||||
serverArray.ShouldContain(NodeBUri);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (Directory.Exists(pkiRootA)) Directory.Delete(pkiRootA, recursive: true);
|
||||
if (Directory.Exists(pkiRootB)) Directory.Delete(pkiRootB, recursive: true);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<OpcUaApplicationHost> StartNodeAsync(
|
||||
string applicationUri, int port, string pkiRoot, string[] peers)
|
||||
{
|
||||
var options = new OpcUaApplicationHostOptions
|
||||
{
|
||||
ApplicationName = applicationUri, // unique per node — SDK uses it for cert CN
|
||||
ApplicationUri = applicationUri,
|
||||
OpcUaPort = port,
|
||||
PublicHostname = "127.0.0.1",
|
||||
PkiStoreRoot = pkiRoot,
|
||||
EnabledSecurityProfiles = new List<OpcUaSecurityProfile> { OpcUaSecurityProfile.None },
|
||||
AutoAcceptUntrustedClientCertificates = true,
|
||||
PeerApplicationUris = peers,
|
||||
};
|
||||
var server = new StandardServer();
|
||||
var host = new OpcUaApplicationHost(options, NullLogger<OpcUaApplicationHost>.Instance);
|
||||
await host.StartAsync(server, CancellationToken.None);
|
||||
return host;
|
||||
}
|
||||
|
||||
private static async Task<string[]> ReadServerArrayAsync(string endpointUrl)
|
||||
{
|
||||
var appConfig = new ApplicationConfiguration
|
||||
{
|
||||
ApplicationName = "OtOpcUa.DualEndpointClient",
|
||||
ApplicationUri = $"urn:OtOpcUa.DualEndpointClient.{Guid.NewGuid():N}",
|
||||
ApplicationType = ApplicationType.Client,
|
||||
SecurityConfiguration = new SecurityConfiguration
|
||||
{
|
||||
ApplicationCertificate = new CertificateIdentifier(),
|
||||
AutoAcceptUntrustedCertificates = true,
|
||||
},
|
||||
ClientConfiguration = new ClientConfiguration { DefaultSessionTimeout = 60_000 },
|
||||
CertificateValidator = new CertificateValidator(),
|
||||
};
|
||||
await appConfig.Validate(ApplicationType.Client);
|
||||
appConfig.CertificateValidator.CertificateValidation += (_, e) => e.Accept = true;
|
||||
|
||||
var endpoint = CoreClientUtils.SelectEndpoint(appConfig, endpointUrl, useSecurity: false);
|
||||
var endpointConfiguration = EndpointConfiguration.Create(appConfig);
|
||||
var configuredEndpoint = new ConfiguredEndpoint(null, endpoint, endpointConfiguration);
|
||||
|
||||
using var session = await Session.Create(
|
||||
appConfig, configuredEndpoint, updateBeforeConnect: false,
|
||||
sessionName: "DualEndpointTests", sessionTimeout: 60_000,
|
||||
identity: new UserIdentity(new AnonymousIdentityToken()),
|
||||
preferredLocales: null);
|
||||
|
||||
var value = session.ReadValue(VariableIds.Server_ServerArray);
|
||||
return (string[])value.Value;
|
||||
}
|
||||
|
||||
private static int AllocateFreePort()
|
||||
{
|
||||
var listener = new TcpListener(IPAddress.Loopback, 0);
|
||||
listener.Start();
|
||||
var port = ((IPEndPoint)listener.LocalEndpoint).Port;
|
||||
listener.Stop();
|
||||
return port;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run the test**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests`
|
||||
Expected: PASS. Wall-time ~3-5 s (two cert-creation cycles + session handshake).
|
||||
|
||||
If the test hangs on the session handshake on first run, it's the SDK reading the trusted-cert store — bumping `AutoAcceptUntrustedClientCertificates = true` on both server hosts (already set above) should resolve it. If `CoreClientUtils.SelectEndpoint` throws because the SDK version uses a different overload, fall back to constructing the `EndpointDescription` directly with `EndpointUrl = endpointUrl, SecurityMode = MessageSecurityMode.None, SecurityPolicyUri = SecurityPolicies.None` and skipping `SelectEndpoint`.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs
|
||||
git commit -m "test(opcua): DualEndpointTests — real client reads peer URIs from Server.ServerArray"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Wire `OtOpcUa.OpcUaServer.IntegrationTests` into v2-ci.yml
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 5, Task 6
|
||||
**Depends on:** Task 3 (project must exist + have a real test before CI runs it)
|
||||
|
||||
**Files:**
|
||||
- Modify: `/Users/dohertj2/Desktop/OtOpcUa/.github/workflows/v2-ci.yml`
|
||||
|
||||
**Step 1: Add the project to the `integration` job**
|
||||
|
||||
Either extend the existing `integration` job to run a second `dotnet test` step, or convert it to a matrix. Prefer a matrix for symmetry with `unit-tests`:
|
||||
|
||||
Open `.github/workflows/v2-ci.yml`, locate the `integration:` job. Replace it with:
|
||||
|
||||
```yaml
|
||||
integration:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet test ${{ matrix.project }}
|
||||
run: dotnet test ${{ matrix.project }} --configuration Release --filter "Category!=E2E"
|
||||
```
|
||||
|
||||
**Step 2: Build green check**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests --configuration Release --filter "Category!=E2E"`
|
||||
Expected: matches the exact CI command — passes locally so CI will pass too.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add .github/workflows/v2-ci.yml
|
||||
git commit -m "ci(v2): include OpcUaServer.IntegrationTests in integration matrix"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Rename `FailoverScenarioTests` → `FailoverDuringDeployTests` (Task 59 cosmetic)
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** Task 0, Task 1, Task 2, Task 6 (different files)
|
||||
|
||||
**Files:**
|
||||
- Rename: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverScenarioTests.cs` → `FailoverDuringDeployTests.cs`
|
||||
- Modify: class name + namespace-internal references
|
||||
|
||||
**Step 1: Rename the file and the class**
|
||||
|
||||
```bash
|
||||
git mv tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverScenarioTests.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverDuringDeployTests.cs
|
||||
```
|
||||
|
||||
Then edit `FailoverDuringDeployTests.cs` and replace the single class declaration `public sealed class FailoverScenarioTests` with `public sealed class FailoverDuringDeployTests`. Use Edit, not sed — the file only declares this class once (`grep -c "FailoverScenario" .` ≤ 2).
|
||||
|
||||
**Step 2: Sweep for any stale references**
|
||||
|
||||
Run: `grep -rln "FailoverScenarioTests" .`
|
||||
Expected: zero matches after Step 1. If anything appears (e.g., a CI filter, a doc), fix the reference.
|
||||
|
||||
**Step 3: Build + run test**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests --filter "FullyQualifiedName~FailoverDuringDeployTests"`
|
||||
Expected: same tests pass that previously passed under the old name.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverDuringDeployTests.cs
|
||||
git commit -m "refactor(test): rename FailoverScenarioTests → FailoverDuringDeployTests for plan parity"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 6: Delete empty bin/obj-only legacy directories
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** Task 0, Task 1, Task 2, Task 5
|
||||
|
||||
**Files:**
|
||||
- Delete: `src/Server/ZB.MOM.WW.OtOpcUa.Server/`
|
||||
- Delete: `src/Server/ZB.MOM.WW.OtOpcUa.Admin/`
|
||||
- Delete: `tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/`
|
||||
- Delete: `tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests/`
|
||||
- Delete: `tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests/`
|
||||
|
||||
**Background:**
|
||||
Source plan Task 56 deleted the projects from `ZB.MOM.WW.OtOpcUa.slnx` (confirmed by the audit) but left `bin/`+`obj/` shells on disk. These confuse new contributors and skew directory listings. None of them are referenced anywhere.
|
||||
|
||||
**Step 1: Sanity-check that each directory is bin/obj-only**
|
||||
|
||||
```bash
|
||||
for dir in \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Server \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Admin \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests; do
|
||||
echo "--- $dir ---"
|
||||
find "$dir" -maxdepth 2 -type f | grep -v "/bin/\|/obj/"
|
||||
done
|
||||
```
|
||||
|
||||
Expected: every section is empty (no source files leak out). If any source file shows, STOP and surface it — don't delete blindly.
|
||||
|
||||
**Step 2: Verify slnx doesn't reference them**
|
||||
|
||||
Run: `grep -nE 'ZB\.MOM\.WW\.OtOpcUa\.(Server|Admin)(/|\.Tests|\.E2ETests)' ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: zero matches.
|
||||
|
||||
**Step 3: Delete the directories**
|
||||
|
||||
```bash
|
||||
rm -rf src/Server/ZB.MOM.WW.OtOpcUa.Server \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Admin \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests
|
||||
```
|
||||
|
||||
**Step 4: Build green check**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: succeeds (these directories were already out of the solution).
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add -A
|
||||
git commit -m "chore(cleanup): remove stale bin/obj shells for deleted v1 Server/Admin projects"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Final build + test green check
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (verification, depends on all prior tasks)
|
||||
|
||||
**Step 1: Restore + build**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: 0 errors, 0 warnings (TreatWarningsAsErrors is on across the solution).
|
||||
|
||||
**Step 2: Run the full test suite**
|
||||
|
||||
Run: `dotnet test ZB.MOM.WW.OtOpcUa.slnx --no-build`
|
||||
Expected: all tests green. Specifically confirm:
|
||||
- `OpcUaApplicationHostServerArrayTests` (Task 1) — pass
|
||||
- `DualEndpointTests` (Task 3) — pass
|
||||
- `FailoverDuringDeployTests` (Task 5) — same count of tests pass as before the rename
|
||||
|
||||
**Step 3: Smoke check the audit assertions**
|
||||
|
||||
Run:
|
||||
```bash
|
||||
ls src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.*.json
|
||||
find tests/Server -iname "DualEndpointTests.cs" -o -iname "FailoverDuringDeployTests.cs"
|
||||
ls -la src/Server/ZB.MOM.WW.OtOpcUa.{Server,Admin} 2>/dev/null
|
||||
```
|
||||
|
||||
Expected:
|
||||
- 4 appsettings files: `.json`, `.Development.json`, `.admin.json`, `.admin-driver.json`, `.driver.json`
|
||||
- Both renamed/new test files exist
|
||||
- The two `ls -la` calls return errors (directories gone)
|
||||
|
||||
**Step 4: No commit unless cleanup turned up**
|
||||
|
||||
If anything failed in Steps 1-3, fix it as a follow-up task — do not paper over with a `--no-verify` commit.
|
||||
|
||||
---
|
||||
|
||||
## Final verification
|
||||
|
||||
After Task 7:
|
||||
|
||||
1. `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — green
|
||||
2. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --no-build` — green (incl. 2 new tests)
|
||||
3. `git log --oneline master..HEAD` — exactly 6 commits, Conventional-Commits style
|
||||
4. Open PR `v2-gap-closeout` → `master` titled "v2: close audit gaps — appsettings overlays, DualEndpointTests, cleanup"
|
||||
|
||||
---
|
||||
|
||||
## Task index
|
||||
|
||||
| # | Title | Class | Time | Parallel with |
|
||||
|---|---|---|---|---|
|
||||
| 0 | Per-role appsettings overlays | small | 3m | 1, 5, 6 |
|
||||
| 1 | OpcUaApplicationHost.PeerApplicationUris + ServerArray | standard | 5m | 0, 5, 6 |
|
||||
| 2 | OpcUaServer.IntegrationTests project skeleton | small | 4m | 5, 6 |
|
||||
| 3 | DualEndpointTests | standard | 5m | 5, 6 |
|
||||
| 4 | CI matrix entry for new IT project | small | 3m | 5, 6 |
|
||||
| 5 | Rename FailoverScenarioTests → FailoverDuringDeployTests | trivial | 2m | 0, 1, 2, 6 |
|
||||
| 6 | Delete stale bin/obj-only directories | trivial | 2m | 0, 1, 2, 5 |
|
||||
| 7 | Final build + test green check | trivial | 3m | none |
|
||||
|
||||
**Total estimated subagent time:** ~27 min.
|
||||
|
||||
**Dependency graph (non-parallel pairs):**
|
||||
- Task 3 depends on Task 1 (option must exist) and Task 2 (project must exist)
|
||||
- Task 4 depends on Task 3 (CI runs the project's tests)
|
||||
- Task 7 depends on all prior tasks
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-26-akka-hosting-alignment-gaps-closeout.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 0: Per-role appsettings overlays", "status": "completed", "commit": "898a477"},
|
||||
{"id": 2, "subject": "Task 1: OpcUaApplicationHost.PeerApplicationUris + ServerArray population", "status": "completed", "commits": ["70ffd28", "cb936db"]},
|
||||
{"id": 3, "subject": "Task 2: OpcUaServer.IntegrationTests project skeleton", "status": "completed", "commit": "83eda9e"},
|
||||
{"id": 4, "subject": "Task 3: DualEndpointTests — real OPC UA client reads both URIs from Server.ServerArray", "status": "completed", "commits": ["dce2528", "a5412c1", "cb936db"], "blockedBy": ["2", "3"]},
|
||||
{"id": 5, "subject": "Task 4: Wire OpcUaServer.IntegrationTests into v2-ci.yml", "status": "completed", "commit": "e8c4f18", "blockedBy": ["4"]},
|
||||
{"id": 6, "subject": "Task 5: Rename FailoverScenarioTests → FailoverDuringDeployTests", "status": "completed", "commit": "25ce111"},
|
||||
{"id": 7, "subject": "Task 6: Delete empty bin/obj-only legacy directories", "status": "completed", "commit": "(no tracked changes — bin/obj only)"},
|
||||
{"id": 8, "subject": "Task 7: Final build + test green check", "status": "completed", "blockedBy": ["1", "2", "3", "4", "5", "6", "7"]}
|
||||
],
|
||||
"lastUpdated": "2026-05-26T00:00:00Z",
|
||||
"finalReview": "approved",
|
||||
"branchHead": "e8c4f18",
|
||||
"branchCommitCount": 8
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,101 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-26-akka-hosting-alignment-plan.md",
|
||||
"branch": "v2-akka-fuse",
|
||||
"designDoc": "docs/plans/2026-05-26-akka-hosting-alignment-design.md",
|
||||
"lastUpdated": "2026-05-26T00:00:00Z",
|
||||
"tasks": [
|
||||
{"id": 0, "subject": "Task 0: Create branch and central package management", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [], "commit": "2b81147"},
|
||||
{"id": 1, "subject": "Task 1: Create OtOpcUa.Commons project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [2,3,4,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 2, "subject": "Task 2: Create OtOpcUa.Cluster project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,3,4,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 3, "subject": "Task 3: Create OtOpcUa.Security project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,4,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 4, "subject": "Task 4: Create OtOpcUa.ControlPlane project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 5, "subject": "Task 5: Create OtOpcUa.Runtime project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,4,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 6, "subject": "Task 6: Create OtOpcUa.OpcUaServer project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,4,5,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 7, "subject": "Task 7: Create OtOpcUa.AdminUI Razor class library", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,4,5,6,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 8, "subject": "Task 8: Create OtOpcUa.Host Web SDK project", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": [1,2,3,4,5,6,7], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 9, "subject": "Task 9: Build green smoke check", "status": "completed", "classification": "trivial", "estMinutes": 2, "parallelizableWith": [], "blockedBy": [1,2,3,4,5,6,7,8], "commit": "30a2104"},
|
||||
{"id": 10, "subject": "Task 10: Add Deployment entity", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [11,12,13], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": 11, "subject": "Task 11: Add NodeDeploymentState entity", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [10,12,13], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": 12, "subject": "Task 12: Add ConfigEdit audit entity", "status": "completed", "classification": "small", "estMinutes": 4, "parallelizableWith": [10,11,13], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": 13, "subject": "Task 13: Add DataProtection keys table", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [10,11,12], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": "14a", "subject": "Task 14a: Add RowVersion to live-edit entities", "status": "completed", "classification": "standard", "estMinutes": 10, "parallelizableWith": [], "blockedBy": [13], "commit": "4bb4ad8"},
|
||||
{"id": "14b", "subject": "Task 14b: Decouple live-edit entities from ConfigGeneration", "status": "completed", "classification": "high-risk", "estMinutes": 30, "parallelizableWith": [], "blockedBy": ["14a"], "commit": "13d3aea"},
|
||||
{"id": "14c", "subject": "Task 14c: Obsolete GenerationApplier/Diff/SealedCache", "status": "completed", "classification": "high-risk", "estMinutes": 20, "parallelizableWith": [], "blockedBy": ["14b"], "commit": "1ddf8bb"},
|
||||
{"id": "14d", "subject": "Task 14d: Drop ClusterNode.RedundancyRole", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": ["14a","14b","14c"], "blockedBy": [13], "commit": "3c915e6"},
|
||||
{"id": "14e", "subject": "Task 14e: Delete ConfigGeneration + ClusterNodeGenerationState", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": [], "blockedBy": ["14b","14c"], "commit": "e00f46d"},
|
||||
{"id": "14f", "subject": "Task 14f: V2HostingAlignment EF migration (consolidator)", "status": "completed", "classification": "high-risk", "estMinutes": 15, "parallelizableWith": [], "blockedBy": ["14a","14b","14c","14d","14e"], "commit": "605dbf3"},
|
||||
{"id": 15, "subject": "Task 15: Migrate-To-V2.ps1 idempotent script", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [16,17,18], "blockedBy": ["14f"], "commit": "c168c1c"},
|
||||
{"id": 16, "subject": "Task 16: Common types (CorrelationId, ExecutionId, NodeId, ...)", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [17,18], "blockedBy": [9], "commit": "fee4a8c"},
|
||||
{"id": 17, "subject": "Task 17: Akka message contracts", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [16,18], "blockedBy": [16], "commit": "5d3a5a4"},
|
||||
{"id": 18, "subject": "Task 18: Common interfaces", "status": "completed", "classification": "small", "estMinutes": 4, "parallelizableWith": [16,17], "blockedBy": [16], "commit": "136234e"},
|
||||
{"id": 19, "subject": "Task 19: HOCON config", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [20,21,22], "blockedBy": [2], "commit": "3d0f4dc"},
|
||||
{"id": 20, "subject": "Task 20: AkkaHostedService implementation", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [19,21,22], "blockedBy": [2,18], "commit": "f184f8e"},
|
||||
{"id": 21, "subject": "Task 21: Role parser from OTOPCUA_ROLES env", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [19,20,22], "blockedBy": [2], "commit": "dfb0636"},
|
||||
{"id": 22, "subject": "Task 22: ClusterRoleInfo implementation", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [19,20,21], "blockedBy": [18,20], "commit": "c217c49"},
|
||||
{"id": 23, "subject": "Task 23: Cluster test project + tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [19,20,21,22], "commit": "e0b6d56"},
|
||||
{"id": 24, "subject": "Task 24: Move LdapAuthService into OtOpcUa.Security", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [25], "blockedBy": [3], "commit": "567b8ca"},
|
||||
{"id": 25, "subject": "Task 25: JwtTokenService", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [24], "blockedBy": [3], "commit": "93316e3"},
|
||||
{"id": 26, "subject": "Task 26: Cookie+JWT hybrid AddOtOpcUaAuth extension", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [27,28], "blockedBy": [13,24,25], "commit": "207fc6a"},
|
||||
{"id": 27, "subject": "Task 27: /auth/login, /auth/ping, /auth/token endpoints", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [26,28], "blockedBy": [24,25], "commit": "8be84ba"},
|
||||
{"id": 28, "subject": "Task 28: CookieAuthenticationStateProvider for Blazor", "status": "completed", "classification": "small", "estMinutes": 4, "parallelizableWith": [26,27], "blockedBy": [25], "commit": "e38f22e"},
|
||||
{"id": 29, "subject": "Task 29: Security test project + tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [24,25,26,27,28], "commit": "38ea0c5"},
|
||||
{"id": 30, "subject": "Task 30: ConfigPublishCoordinator happy path", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [32,33,34,35], "blockedBy": [4,17,18,10,11], "commit": "62e12da"},
|
||||
{"id": 31, "subject": "Task 31: Coordinator timeout + failover recovery", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [32,33,34,35], "blockedBy": [30], "commit": "f193872"},
|
||||
{"id": 32, "subject": "Task 32: AdminOperationsActor + StartDeployment", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [30,31,33,34,35], "blockedBy": [4,17,18,10,12], "commit": "ef683f5"},
|
||||
{"id": 33, "subject": "Task 33: AuditWriterActor batched idempotent insert", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [30,31,32,34,35], "blockedBy": [4,17], "commit": "23f669c"},
|
||||
{"id": 34, "subject": "Task 34: FleetStatusBroadcaster", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [30,31,32,33,35], "blockedBy": [4,17], "commit": "dd122c4"},
|
||||
{"id": 35, "subject": "Task 35: RedundancyStateActor + ServiceLevelCalculator", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [30,31,32,33,34], "blockedBy": [4,17,18], "commit": "6b37f99"},
|
||||
{"id": 36, "subject": "Task 36: Singleton registration extension (admin role)", "status": "completed", "classification": "standard", "estMinutes": 4, "parallelizableWith": [], "blockedBy": [30,31,32,33,34,35], "commit": "52bf4b3"},
|
||||
{"id": 37, "subject": "Task 37: DriverHostActor scaffolding + PreStart recovery", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [41,42,43,44], "blockedBy": [5,17,18,11], "commit": "ed13013"},
|
||||
{"id": 38, "subject": "Task 38: DriverHostActor DispatchDeployment handler", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [41,42,43,44], "blockedBy": [37], "commit": "ed13013"},
|
||||
{"id": 39, "subject": "Task 39: DriverHostActor stale-config fallback", "status": "completed", "classification": "standard", "estMinutes": 4, "parallelizableWith": [41,42,43,44], "blockedBy": [38], "commit": "ed13013"},
|
||||
{"id": 40, "subject": "Task 40: Runtime test project bootstrap", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [], "blockedBy": [37,38,39], "commit": "ed13013"},
|
||||
{"id": 41, "subject": "Task 41: DriverInstanceActor state machine", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [42,43,44], "blockedBy": [5,17,40], "commit": "64c627f"},
|
||||
{"id": 42, "subject": "Task 42: VirtualTagActor", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [41,43,44], "blockedBy": [5,17,40], "commit": "39729bf"},
|
||||
{"id": 43, "subject": "Task 43: ScriptedAlarmActor", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [41,42,44], "blockedBy": [5,17,40], "commit": "95ef533"},
|
||||
{"id": 44, "subject": "Task 44: OpcUaPublishActor on synchronized dispatcher", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [41,42,43], "blockedBy": [5,6,17,19,40], "commit": "e115f13"},
|
||||
{"id": 45, "subject": "Task 45: HistorianAdapter + PeerOpcUaProbe + DbHealthProbe actors", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [37,40], "commit": "28639cb"},
|
||||
{"id": 46, "subject": "Task 46: Extract OpcUaApplicationHost + Phase7Composer", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [6], "commit": "2877a88"},
|
||||
{"id": 47, "subject": "Task 47: Phase7Composer purity + property tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [48,49,50,51,52], "blockedBy": [46], "commit": "b7c117a"},
|
||||
{"id": 48, "subject": "Task 48: Move Blazor components into AdminUI library", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [47], "blockedBy": [7], "commit": "1a067e6"},
|
||||
{"id": 49, "subject": "Task 49: Move SignalR hubs and rewire to FleetStatusBroadcaster", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [50,51,52], "blockedBy": [34,48], "commit": "26d8f2f"},
|
||||
{"id": 50, "subject": "Task 50: IAdminOperationsClient via ClusterSingletonProxy", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [49,51,52], "blockedBy": [18,32,48], "commit": "f022499"},
|
||||
{"id": 51, "subject": "Task 51: Replace DriverDiagnosticsClient with IFleetDiagnosticsClient", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [49,50,52], "blockedBy": [18,48], "commit": "b83f099"},
|
||||
{"id": 52, "subject": "Task 52: Drift indicator + Deploy button UI", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [49,50,51], "blockedBy": [50,48], "commit": "f167808"},
|
||||
{"id": 53, "subject": "Task 53: Host Program.cs role-gated startup", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [54,55], "blockedBy": [8,15,20,21,22,26,36,40,45,46,48,49], "commit": "e2b357f"},
|
||||
{"id": 54, "subject": "Task 54: Health endpoints + appsettings layout", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [53,55], "blockedBy": [8,22], "commit": "fa1d685"},
|
||||
{"id": 55, "subject": "Task 55: Mac dev mode + DEV-STUB drivers", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [53,54], "blockedBy": [41], "commit": "8b4de80"},
|
||||
{"id": 56, "subject": "Task 56: Delete OtOpcUa.Server + OtOpcUa.Admin projects", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [53,54,55], "commit": "76310b8"},
|
||||
{"id": 57, "subject": "Task 57: Build & test green check", "status": "completed", "classification": "trivial", "estMinutes": 3, "parallelizableWith": [], "blockedBy": [56], "commit": "76310b8"},
|
||||
{"id": 58, "subject": "Task 58: 2-node integration test harness", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [57], "commit": "d6fac2d", "deviation": "Also consolidated to a single Akka.Hosting ActorSystem — Program.cs ran two competing ActorSystems (custom AkkaHostedService + Akka.Hosting AddAkka). Cluster singletons landed on the bare one. Fixed in this commit; AkkaHostedService.cs deleted. docker-compose.yml (SQL+OpenLDAP for real local runs) deferred — harness uses EF in-memory."},
|
||||
{"id": 59, "subject": "Task 59: Deploy + failover integration tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [60], "blockedBy": [58], "commit": "5cfbe8b", "deviation": "Happy-path + idempotency landed. Failover scenarios (kill-mid-apply, split-brain, restart-during-deploy) deferred as F22 — they need node-down/restart primitives on the harness. Two production bugs fixed in this commit: (1) coordinator missing DPS subscription for ACKs, (2) NodeId collision on shared loopback host."},
|
||||
{"id": 60, "subject": "Task 60: OPC UA dual-endpoint + ServiceLevel tests", "status": "pending", "classification": "standard", "estMinutes": 5, "parallelizableWith": [59], "blockedBy": [58]},
|
||||
{"id": 61, "subject": "Task 61: E2E test infrastructure + GitHub Actions CI", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [59,60], "commit": "253fb60", "deviation": "CI workflow files landed but E2E test project (tests/Server/ZB.MOM.WW.OtOpcUa.E2ETests) deferred — it lands when F10/F11/F12 wire enough engine for an end-to-end round-trip to be meaningful. The E2E workflow runs against the docker-dev fleet but its --filter Category=E2E currently matches zero tests."},
|
||||
{"id": 62, "subject": "Task 62: Rewrite Install-Services.ps1", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [63,64,65], "blockedBy": [53], "commit": "e40615d"},
|
||||
{"id": 63, "subject": "Task 63: Traefik config + docker-dev compose", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [62,64,65], "blockedBy": [53], "commit": "7e3b56c", "deviation": "Untested on macOS (no local Docker). Compose file should work — exercise + adjust on first run against a real Docker host."},
|
||||
{"id": 64, "subject": "Task 64: Update existing docs (Redundancy, ServiceHosting, security)", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [62,63,65], "blockedBy": [57], "commit": "3c3fef9", "deviation": "Redundancy.md + ServiceHosting.md full rewrites. security.md v2 banner only — full per-section rewrite waits for F15 (Admin pages migration) since security.md references many pages that will move. README.md platform-overview updated."},
|
||||
{"id": 65, "subject": "Task 65: New v2 docs (Architecture-v2, Cluster, ControlPlane, Runtime)", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [62,63,64], "blockedBy": [57], "commit": "1689901"},
|
||||
{"id": "F1", "subject": "Follow-up: AuthEndpoints integration tests against fused Host", "status": "completed", "classification": "small", "estMinutes": 10, "parallelizableWith": ["F2"], "blockedBy": [53], "commit": "463512d", "origin": "Deviation from Task 29 (commit 38ea0c5) — deferred until Task 53 wires AddOtOpcUaAuth/MapOtOpcUaAuth in Program. Add WebApplicationFactory<OtOpcUa.Host.Program> tests for /auth/login (204/401/503), /auth/ping (401/200), /auth/token (200+JWT), /auth/logout (204+cookie clear) using a stub ILdapAuthService.", "deviation": "Used HostBuilder + TestServer directly (Security.Tests/AuthEndpointsIntegrationTests) instead of WebApplicationFactory<Program> — Host needs Akka cluster bootstrap that's out of scope for this contract test. Cluster-mode auth coverage belongs in Task 58."},
|
||||
{"id": "F2", "subject": "Follow-up: Replace JwtBearer BuildServiceProvider antipattern with IPostConfigureOptions", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": ["F1"], "blockedBy": [], "commit": "45a8c79", "origin": "Deviation from Task 26 (commit 207fc6a) — AddOtOpcUaAuth uses services.BuildServiceProvider().CreateScope() inside .AddJwtBearer lambda (ASP0000). Refactor to IPostConfigureOptions<JwtBearerOptions> so validation parameters resolve lazily from the real request provider."},
|
||||
{"id": "F3", "subject": "Follow-up: Add EventId unique column to ConfigAuditLog for cross-restart audit idempotency", "status": "completed", "classification": "small", "estMinutes": 15, "parallelizableWith": ["F4"], "blockedBy": [], "commit": "f57f61d", "origin": "Deviation from Task 33 — AuditWriterActor only dedups in-buffer; ConfigAuditLog lacks EventId column so a duplicate AuditEvent that arrives after a flush becomes a duplicate row. Add nullable EventId Guid + filtered unique index, migration, and refactor AuditWriterActor.WrapDetails away."},
|
||||
{"id": "F4", "subject": "Follow-up: Harden AuditWriterActor.WrapDetails JSON synthesis with System.Text.Json", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": ["F3"], "blockedBy": [], "commit": "f57f61d", "deviation": "Moot — F3 deleted WrapDetails entirely (EventId/CorrelationId now live in dedicated columns).", "origin": "Self-review of Task 33 — WrapDetails uses string concat; malformed caller DetailsJson would produce invalid JSON and trip the CK_ConfigAuditLog_DetailsJson_IsJson constraint, killing the entire flush batch. Discard this task if F3 lands first (F3 removes WrapDetails entirely)."},
|
||||
{"id": "F5", "subject": "Follow-up: ConfigPublishCoordinator multi-node happy-path test", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "5cfbe8b", "deviation": "Delivered by Task 59 — DeployHappyPathTests.StartDeployment_seals_after_both_nodes_apply exercises the exact 'dispatch to N driver nodes, all ack, seals' flow via the real 2-node TwoNodeClusterHarness rather than a multi-system TestKit. Cleaner because it tests the production code path end-to-end.", "origin": "Self-review of Task 30 — single-ActorSystem TestKit can't simulate the plan's 'dispatch to N driver nodes, all ack, seals' happy path because DiscoverDriverNodes() needs real cluster membership. Add a multi-system test (two ActorSystems joined into one cluster, driver-role on the second)."},
|
||||
{"id": "F6", "subject": "Follow-up: RedundancyStateActor publisher abstraction so tests don't need DPS bootstrap", "status": "completed", "classification": "small", "estMinutes": 10, "parallelizableWith": [], "blockedBy": [], "commit": "dfc143c", "origin": "Self-review of Task 35 — RedundancyStateActorTests are skipped because single-node DistributedPubSub bootstrap is unreliable in TestKit. Inject an Action<object> broadcast so tests can replace it with a probe; un-skip both tests."},
|
||||
{"id": "F7", "subject": "Follow-up: DriverInstanceActor full engine wiring (subscriptions, writes, ApplyDelta diff)", "status": "completed", "classification": "standard", "estMinutes": 45, "parallelizableWith": [], "blockedBy": [44], "origin": "Self-review of Task 41 — subscription publishing, ApplyDelta diffing, bad-quality-on-disconnect, write path, and supervisor backoff are stubbed. Wire after OpcUaPublishActor lands.", "shipped": "All three pieces landed: (1) spawn lifecycle in DriverHostActor (DriverSpawnPlanner + IDriverFactory seam) — da14149, (2) ISubscribable wiring + OPC UA status-code → OpcUaQuality severity-bit mapping + DetachSubscription on disconnect/PostStop, (3) IWritable.WriteAsync write path with 5s timeout, status-code bubble-up, and AttributeValuePublished published to parent on every OnDataChange — both shipped in the F7-residual batch. Host DI binding (DriverFactoryBootstrap registers AbCip/AbLegacy/FOCAS/Galaxy/Modbus/S7/TwinCAT factories) lives in src/Server/ZB.MOM.WW.OtOpcUa.Host/Drivers/."},
|
||||
{"id": "F8", "subject": "Follow-up: VirtualTagActor engine wiring (compile expression, subscribe deps, publish result)", "status": "partial", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 42 — VirtualTagEngine.Evaluate not called; DependencyValueChanged just buffers.", "shipped": "(1) IVirtualTagEvaluator seam + NullVirtualTagEvaluator default. VirtualTagActor calls evaluator on DependencyValueChanged, dedupes unchanged results, emits EvaluationResult to parent, publishes Warning ScriptLogEntry on failure. (2) DependencyMuxActor in Runtime fans out DriverInstanceActor.AttributeValuePublished from DriverHostActor through to interested VirtualTagActor subscribers. VirtualTagActor takes dependencyRefs + mux ActorRef in Props, registers interest in PreStart, unregisters in PostStop. WithOtOpcUaRuntimeActors spawns the mux + threads it into DriverHostActor. Production binding to Core.VirtualTags.VirtualTagEngine (expression compile + dep extraction) still TODO — split as F8b."},
|
||||
{"id": "F9", "subject": "Follow-up: ScriptedAlarmActor engine wiring + state persistence", "status": "partial", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 43 — AlarmConditionService not called; PreRestart persistence to ScriptedAlarmState DB not wired; HistorianAdapter rows not emitted.", "shipped": "(1) IScriptedAlarmEvaluator seam + NullScriptedAlarmEvaluator default. ScriptedAlarmActor takes AlarmConfig (id/name/path/severity/predicate), evaluates on DependencyValueChanged, publishes AlarmTransitionEvent + ScriptLogEntry on every transition. (2) IAlarmActorStateStore seam in Commons.Engines + NullAlarmActorStateStore default + EfAlarmActorStateStore production adapter over the ScriptedAlarmState entity. ScriptedAlarmActor PreStart loads + restores; every Transition fires a fire-and-forget save with lastAckUser. Predicate binding to Core.ScriptedAlarms.ScriptedAlarmEngine still TODO — split as F9b."},
|
||||
{"id": "F10", "subject": "Follow-up: OpcUaPublishActor SDK integration (address-space writes + ServiceLevel + RebuildAddressSpace)", "status": "partial", "classification": "high-risk", "estMinutes": 60, "parallelizableWith": [], "blockedBy": [47], "origin": "Self-review of Task 44 — SDK calls stubbed; counters only. Wire after Phase 7 OpcUaServer extraction.", "shipped": "(1) IOpcUaAddressSpaceSink + IServiceLevelPublisher seams in Commons.OpcUa with Null* defaults. OpcUaPublishActor routes through the sink, dedupes ServiceLevelChanged, subscribes to redundancy-state DPS topic, maps redundancy snapshot to a coarse ServiceLevel (Primary+leader=240, Primary=200, Secondary=100, Detached=0). (2) OtOpcUaNodeManager (CustomNodeManager2) + OtOpcUaSdkServer (StandardServer subclass) + SdkAddressSpaceSink in OpcUaServer — lazy variable creation on first WriteValue, WriteAlarmState shape, RebuildAddressSpace tear-down. Variable updates propagate via ClearChangeMasks so subscribed OPC UA clients see them. Tests boot a real StandardServer + verify sink writes show up in the manager. Production wiring through OpcUaApplicationHost.StartAsync (default server = OtOpcUaSdkServer) + IServiceLevelPublisher SDK binding + #109 OpcUaPublishActor→Phase7Applier integration are the remaining pieces."},
|
||||
{"id": "F11", "subject": "Follow-up: HistorianAdapterActor named-pipe IPC + SqliteStoreAndForwardSink wiring", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "6861381", "deviationNotes": "Reshaped HistorianAdapterActor around the existing IAlarmHistorianSink abstraction (alarm-event shape, not the original tag-history-row stub). Defaults to NullAlarmHistorianSink; production deployments wire SqliteStoreAndForwardSink + WonderwareHistorianClient via AddOtOpcUaRuntime overrides. Actor now exposes GetStatus returning HistorianSinkStatus for diagnostics. Named-pipe transport implementation lives unchanged in src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs — the actor is intentionally just a fire-and-forget bridge.", "origin": "Self-review of Task 45 — stub buffers in-memory; named-pipe + SQLite store-and-forward not wired."},
|
||||
{"id": "F12", "subject": "Follow-up: PeerOpcUaProbeActor real opc.tcp ping (replace Ok=true stub)", "status": "completed", "classification": "small", "estMinutes": 20, "parallelizableWith": [], "blockedBy": [], "commit": "b06e3ae", "deviation": "TCP-connect probe rather than full OPC UA Hello/Acknowledge handshake. Enough for the redundancy calc; deeper liveness signals can layer on later without changing the actor's contract.", "origin": "Self-review of Task 45 — RunProbe always returns Ok=true; replace with OPC UA Client connect."},
|
||||
{"id": "F13", "subject": "Follow-up: Full OpcUaApplicationHost extraction (security/alarms/history/observability)", "status": "partial", "classification": "high-risk", "estMinutes": 120, "parallelizableWith": [], "blockedBy": [], "commit": "36c4751-partial", "deviationNotes": "F13a (cert auto-creation) shipped in 36c4751. Remaining: endpoint-security wiring (SecurityProfileResolver into ServerConfiguration.SecurityPolicies), LDAP user-token validator (the OPC UA UserNameToken path; HTTP-layer LDAP auth is separate and already in OtOpcUa.Security), scripted-alarm node manager creation, history backend wiring, observability hooks (OpenTelemetry metrics + traces). These are gated by F10's OpcUaPublishActor SDK integration — until F10 lands, nothing instantiates OpcUaApplicationHost so the missing wiring is dead weight.", "origin": "Self-review of Task 46 — facade only boots ApplicationInstance + StandardServer. Legacy 391-line file pulls Server.Security/Alarms/History/Observability. Pull those into thin OpcUaServer interfaces."},
|
||||
{"id": "F14", "subject": "Follow-up: Migrate side-effecting Phase7Composer (EquipmentNodeWalker, trace logs, node cache)", "status": "partial", "classification": "standard", "estMinutes": 60, "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 47 — pure version covers the projection. Walker + alarm sink registration + cache mutation stay in legacy until split into Phase7Applier.", "shipped": "Phase7Plan + Phase7Planner.Compute (pure diff over EquipmentNodes/DriverInstancePlans/ScriptedAlarmPlans by stable id, with Added/Removed/Changed lists). Phase7Applier consumes plan + IOpcUaAddressSpaceSink: drives RebuildAddressSpace on Equipment/Alarm topology change, writes inactive AlarmState for removed nodes, catches + logs sink faults. Driver-only changes correctly skip the rebuild (DriverHostActor's spawn-plan in Runtime handles those). Walker integration with the real SDK NodeManager is the remaining piece — split as F14b (consumes the existing EquipmentNodeWalker once F10b lands an SDK builder)."},
|
||||
{"id": "F15", "subject": "Follow-up: Migrate 47 legacy Admin Blazor components into AdminUI library", "status": "completed", "classification": "high-risk", "estMinutes": 180, "commit": "Phase A-D (read views) + F15.2 batches 1-4 (live-edit CRUD) + F15.3 (live alerts/script-log/CSV import/Monaco)", "deviationNotes": "All 4 phases of read-only views shipped: Phase A (shell/auth/fleet/hosts), B (cluster CRUD + Overview/Redundancy), C (Equipment/UNS/Namespaces/Drivers/Tags/ACLs), D (Audit/VirtualTags/ScriptedAlarms/Scripts/RoleGrants/Certificates/Reservations/AlarmsHistorian). Per Q1–Q5 of docs/v2/AdminUI-rebuild-plan.md: typed driver editors deferred, top-level VirtualTags/ScriptedAlarms kept (Q2 reversed for cross-cluster discoverability), routes-not-tabs adopted, fleet-wide LDAP→role map only, generic login errors. Live-edit forms (F15.2) and ScriptLog page (depends on F16 ScriptLogHub) are explicit follow-ups.", "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 48 — only MapAdminUI scaffold + 1 new page (Deployments). 47 pages stay in legacy Admin (accepted-broken) until Task 56."},
|
||||
{"id": "F16", "subject": "Follow-up: Bridge FleetStatusBroadcaster → SignalR hubs (FleetStatusHub / AlertHub / ScriptLogHub)", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "f18c285", "deviation": "FleetStatusHub bridge landed. AlertHub + ScriptLogHub deferred — they need upstream message contracts that aren't defined yet (alerts emerge from F9 ScriptedAlarmActor, script logs from F8 VirtualTagActor).", "origin": "Self-review of Task 49 — hubs are passive Hub subclasses; the bridge from FleetStatusBroadcaster.broadcast → IHubContext is not wired."},
|
||||
{"id": "F17", "subject": "Follow-up: FleetDiagnosticsClient real Akka ActorSelection round-trip (GetDiagnosticsRequest)", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "8f32b89", "origin": "Self-review of Task 51 — client returns an empty snapshot stub. Add GetDiagnosticsRequest contract + DriverHostActor handler + real Ask/Reply."},
|
||||
{"id": "F18", "subject": "Follow-up: Thread HttpContext.User.Identity.Name into Deployments page (createdBy)", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [], "commit": "b266f63", "origin": "Self-review of Task 52 — Deployments.razor hardcodes createdBy=\"(current user)\"; needs @inject AuthenticationStateProvider."},
|
||||
{"id": "F19", "subject": "Follow-up: RuntimeStartup extension for driver-role node-actor spawn", "status": "completed", "classification": "standard", "estMinutes": 20, "parallelizableWith": [], "blockedBy": [], "commit": "09d6676", "origin": "Self-review of Task 53 — only admin-role singletons are wired via WithOtOpcUaControlPlaneSingletons. Driver-role nodes need a parallel WithOtOpcUaRuntimeActors that spawns DriverHostActor."},
|
||||
{"id": "F20", "subject": "Follow-up: Wire DriverInstanceActor.ShouldStub() into DriverHostActor child spawn", "status": "completed", "classification": "small", "estMinutes": 10, "parallelizableWith": ["F7"], "blockedBy": [], "origin": "Self-review of Task 55 — ShouldStub helper exists but nothing calls it. Folds into F7 when DriverHostActor learns to spawn DriverInstanceActor children.", "shipped": "DriverHostActor.SpawnChild now calls DriverInstanceActor.ShouldStub(type, _localRoles) and routes Windows-only driver types to the stub path on non-Windows / dev-role hosts. Verified by DriverHostActorReconcileTests.Galaxy_on_non_windows_is_stubbed_by_ShouldStub_check."},
|
||||
{"id": "F21", "subject": "Follow-up: docker-compose.yml for Host.IntegrationTests (real SQL Server + OpenLDAP)", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "b0a2bb0", "deviationNotes": "Stack shipped (SQL on 14331, OpenLDAP on 3894). HarnessMode reads OTOPCUA_HARNESS_USE_SQL=1 / USE_LDAP=1 from env; SQL mode uses per-harness unique DB via EnsureCreated. Compose itself not local-validated — DESKTOP-6JL3KKO has no Docker per CLAUDE.md; CI on Linux will exercise the real path. The xunit test-trait split was punted — env vars are simpler and cover the same use case (one suite, two modes, no test-class duplication).", "origin": "Deviation from Task 58 — TwoNodeClusterHarness uses EF InMemoryDatabase + StubLdapAuthService. For Mac-friendly local runs against real SQL constraints + LDAP, add tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/docker-compose.yml (SQL Server + OpenLDAP), wire EF SqlServer provider behind an env var (OTOPCUA_HARNESS_USE_SQL=1), and add a test trait so CI can run both modes."},
|
||||
{"id": "F22", "subject": "Follow-up: failover scenario integration tests (kill-mid-apply, split-brain, restart-during-deploy)", "status": "completed", "classification": "standard", "estMinutes": 60, "parallelizableWith": [], "blockedBy": [], "commit": "cd5540c", "deviationNotes": "Shipped 3 scenarios on the existing 2-node harness: stop-shrinks, restart-rejoins-same-port, deploy-with-one-node-down. Split-brain via simulated partition deferred — Akka.Hosting + xunit don't expose transport-level interference cleanly. The graceful-shutdown + rejoin path is what production actually exercises; ungraceful kill-mid-apply non-deterministic under SBR's 15s stable-after.", "origin": "Deviation from Task 59 — happy-path + idempotency landed but design §8 cases 3-7 need controlled node-down primitives on TwoNodeClusterHarness (StopNodeAsync, RestartNodeAsync, PartitionBetweenAsync). Add those + 5 scenario tests."}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,308 @@
|
||||
# AdminUI — Driver-Specific Pages
|
||||
|
||||
**Status:** Design approved, ready for implementation planning
|
||||
**Date:** 2026-05-28
|
||||
**Branch:** `master` (work to land on a feature branch)
|
||||
|
||||
## 1. Motivation
|
||||
|
||||
Today the AdminUI has a single generic `DriverEdit.razor` page (`src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/DriverEdit.razor`, 323 lines) that edits every driver type via a raw JSON `DriverConfig` textarea. The page itself flags this as temporary:
|
||||
|
||||
> Per Q1 of the AdminUI rebuild plan, typed driver editors (Modbus, FOCAS) are deferred… lands in a Phase C.2 follow-up.
|
||||
|
||||
This design is that follow-up. Goals:
|
||||
|
||||
1. Replace the JSON blob with a **typed form per driver type** that exposes every supported configuration option.
|
||||
2. Add three driver-aware operator capabilities to each page: **Test Connect**, **live runtime status**, and a **driver-specific tag/address picker**.
|
||||
3. Add **Reconnect / Restart** controls on the status panel for authorized users.
|
||||
|
||||
## 2. Scope
|
||||
|
||||
All 9 driver types ship typed pages in this work:
|
||||
|
||||
```
|
||||
ModbusTcp, AbCip, AbLegacy, S7, TwinCat, FOCAS,
|
||||
OpcUaClient, Galaxy, Historian.Wonderware
|
||||
```
|
||||
|
||||
Each typed page exposes the full surface of its driver's options class — the JSON editor is retired; the typed form is the only way to edit driver config from the AdminUI.
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
### 3.1 Project layout
|
||||
|
||||
```
|
||||
src/Drivers/
|
||||
ZB.MOM.WW.OtOpcUa.Driver.<Type>/ (runtime, unchanged behavior)
|
||||
ZB.MOM.WW.OtOpcUa.Driver.<Type>.Contracts/ NEW — POCO options + DataAnnotations only
|
||||
<Type>DriverOptions.cs (moved from runtime project)
|
||||
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/
|
||||
Components/Pages/Clusters/Drivers/ NEW folder
|
||||
DriverTypePicker.razor route: /clusters/{id}/drivers/new
|
||||
DriverEditRouter.razor route: /clusters/{id}/drivers/{instanceId}
|
||||
ModbusDriverPage.razor route: /clusters/{id}/drivers/new/modbus
|
||||
GalaxyDriverPage.razor route: /clusters/{id}/drivers/new/galaxy
|
||||
S7DriverPage.razor
|
||||
OpcUaClientDriverPage.razor
|
||||
AbCipDriverPage.razor
|
||||
AbLegacyDriverPage.razor
|
||||
TwinCatDriverPage.razor
|
||||
FocasDriverPage.razor
|
||||
HistorianWonderwareDriverPage.razor
|
||||
Components/Shared/Drivers/ NEW folder
|
||||
DriverFormShell.razor panel layout + Save/Cancel/Delete
|
||||
DriverIdentitySection.razor InstanceId, Name, Namespace, Enabled
|
||||
DriverResilienceSection.razor Polly overrides
|
||||
DriverStatusPanel.razor live status + Reconnect/Restart
|
||||
DriverTestConnectButton.razor per-driver-timeout probe
|
||||
DriverTagPicker.razor modal shell, hosts per-driver picker body
|
||||
Hubs/
|
||||
DriverStatusHub.cs NEW SignalR hub at /hubs/driverstatus
|
||||
DriverStatusSignalRBridge.cs NEW (mirrors FleetStatusSignalRBridge)
|
||||
```
|
||||
|
||||
### 3.2 Routing
|
||||
|
||||
- `/clusters/{ClusterId}/drivers` — existing `ClusterDrivers.razor` list, unchanged.
|
||||
- `/clusters/{ClusterId}/drivers/new` — new `DriverTypePicker.razor` (operator picks driver type).
|
||||
- `/clusters/{ClusterId}/drivers/new/{driverType}` — typed new-form for that type.
|
||||
- `/clusters/{ClusterId}/drivers/{DriverInstanceId}` — `DriverEditRouter.razor` reads the row's `DriverType`, dispatches to the right `*DriverPage` via `<DynamicComponent>` (no redirect flicker).
|
||||
|
||||
### 3.3 Schema source
|
||||
|
||||
Each driver's `Options` class moves to a new `Driver.<Type>.Contracts` csproj — POCO + `System.ComponentModel.DataAnnotations` attributes only, no NuGet references, no project references. The runtime driver project adds a `ProjectReference` to its contracts sibling and re-uses the same type (single source of truth, no `TypeForwardedTo` needed if the namespace is preserved). The AdminUI gains 9 `ProjectReference`s — all pure POCO, so no native deps (Galaxy COM, FOCAS native libs, OPC UA stack) leak into the AdminUI publish output.
|
||||
|
||||
Attributes used:
|
||||
|
||||
- `[Required]`, `[Range(...)]`, `[RegularExpression(...)]` — render as inputs + `<ValidationMessage>` via `DataAnnotationsValidator`.
|
||||
- `[Display(Name, Description, GroupName)]` — label, help-text under field, panel section.
|
||||
- `[DataType(DataType.Password)]` — render as `<InputText type="password">` (e.g. mxaccessgw API key).
|
||||
|
||||
The `*DriverPage.razor` files **explicitly** bind each field (no runtime reflection). Attributes drive labels/help/validation but not field discovery — this avoids the "metadata silently drifts from rendering" trap.
|
||||
|
||||
### 3.4 Persistence
|
||||
|
||||
`DriverInstance.DriverConfig` stays a JSON string column (no schema change). On save: typed form-model serialized via `System.Text.Json` against the driver's Options class. On load: row's JSON deserialized into the matching Options class with `JsonSerializerOptions { UnmappedMemberHandling = Skip }` so old/unknown fields are silently dropped on next save. Version skew is bounded by the fact that drivers ship as one host binary.
|
||||
|
||||
`RowVersion` optimistic concurrency unchanged from today's `DriverEdit.razor`.
|
||||
|
||||
## 4. Test Connect
|
||||
|
||||
### 4.1 Flow
|
||||
|
||||
```
|
||||
[Browser] [AdminUI server] [Cluster]
|
||||
|
||||
DriverGalaxyPage AdminProbeService AdminOperationsActor
|
||||
| | |
|
||||
|-- click TestConnect --->| |
|
||||
| |-- Ask<TestDriverConnect> --->|
|
||||
| | (driverType, configJson, |
|
||||
| | timeoutSecs) |
|
||||
| | |--> spawn transient probe actor
|
||||
| | | (resolves IDriverProbe by
|
||||
| | | driverType via DI)
|
||||
| | |<-- ProbeResult (ok, latencyMs)
|
||||
| |<-- TestDriverConnectResult --|
|
||||
|<-- green / red chip ----|
|
||||
```
|
||||
|
||||
### 4.2 Components
|
||||
|
||||
- **`IDriverProbe`** — interface in `Core.Abstractions` (or equivalent). One implementation per driver type, lives in the driver's **runtime** project. Reuses the existing `IHostConnectivityProbe` plumbing where present (FOCAS, TwinCAT confirmed). For drivers without one, the probe is a cheap subset of the real connect path: TCP `SocketAsyncOperations` for Modbus/AbCip/S7, session open+close for OpcUaClient, `MxCommand.Ping` for Galaxy. Probes **never write**.
|
||||
- **`TestDriverConnect` message** in `Commons/Messages/Admin` — `(string driverType, string configJson, TimeSpan timeout)`. Handler in `AdminOperationsActor`: resolves the right probe via keyed DI (`IServiceProvider.GetRequiredKeyedService<IDriverProbe>(driverType)`), deserializes JSON into the matching Options class, calls `probe.RunAsync(options, ct)`. Returns `TestDriverConnectResult(bool ok, string? message, TimeSpan? latency)`.
|
||||
- **`AdminProbeService`** (AdminUI side) — thin wrapper around the existing AdminOperationsActor bridge. Caller passes timeout; service enforces a 60s hard backstop.
|
||||
- **`<DriverTestConnectButton>`** — accepts driver type + `Func<string>` to build form JSON on-click. Renders button + inline result chip (auto-clears after 30s). Disabled while in-flight.
|
||||
|
||||
### 4.3 Timeout
|
||||
|
||||
Each driver's Options class exposes a `ProbeTimeout` (`TimeSpan` or `int Seconds`) with a driver-appropriate default — e.g. Modbus 5s, OpcUaClient 15s, Galaxy 30s. The button reads from the live form (not the persisted row), so an operator can override the timeout per probe attempt. Server-side max = 60s.
|
||||
|
||||
### 4.4 Safety
|
||||
|
||||
- Probe spawns a transient actor with the *form's* config — the live driver actor (using the *persisted* config) is untouched.
|
||||
- Probe never mutates the live driver or the database.
|
||||
- Probe inherits the user context via the existing AdminOperationsActor audit-log entry.
|
||||
|
||||
## 5. Live Status Panel
|
||||
|
||||
### 5.1 Flow
|
||||
|
||||
```
|
||||
DriverActor DriverStatusSignalRBridge DriverStatusPanel (browser)
|
||||
| ^ |
|
||||
|-- publishes |-- subscribed in |
|
||||
| DriverHealthChanged | OnInitializedAsync |
|
||||
| to event stream | with InstanceId filter |
|
||||
| | |
|
||||
| |-- pushes update -------------->|
|
||||
| |
|
||||
| |-- renders state chip,
|
||||
| | last-success, error count,
|
||||
| | Reconnect/Restart buttons
|
||||
```
|
||||
|
||||
### 5.2 Reused infrastructure
|
||||
|
||||
Driver actors already maintain `DriverHealth(state, lastSuccessUtc, lastError)` — confirmed in FOCAS (`FocasDriver.cs`) and TwinCAT. The bridge mirrors the existing `FleetStatusSignalRBridge` + `AlertSignalRBridge` pattern. SignalR hub uses the same cookie-auth as existing hubs.
|
||||
|
||||
### 5.3 New components
|
||||
|
||||
- **`DriverStatusHub`** — single method `JoinDriver(string driverInstanceId)`, adds connection to a per-instance group and immediately replies with the current snapshot.
|
||||
- **`DriverStatusSignalRBridge`** — subscribes to per-cluster driver-health event stream, fans out into SignalR groups keyed by `driverInstanceId`. Only running drivers publish; `Enabled=false` instances render "Disabled — not deployed" without subscribing.
|
||||
- **`<DriverStatusPanel>`** — props `DriverInstanceId`, `Enabled`. Opens hub on init, calls `JoinDriver`, registers `On<StatusSnapshot>("status", ...)`. Renders state chip (`Healthy` / `Connecting` / `Faulted` / `Unknown`) + last-success timestamp ("2s ago") + error count over last 5min + last error message (collapsed, expandable). Disposes hub on dispose.
|
||||
|
||||
### 5.4 Reconnect / Restart controls
|
||||
|
||||
Two buttons on the status panel:
|
||||
|
||||
- **Reconnect** — driver actor closes + reopens its transport, keeps actor alive. Fast, idempotent. No confirm dialog.
|
||||
- **Restart** — full actor stop + respawn, loses in-memory state. Slower, can interrupt active subscriptions. Confirm dialog required.
|
||||
|
||||
Both:
|
||||
|
||||
- Gated by authorization policy `DriverOperator` (mapped to an LDAP group via existing `Authentication.Ldap` config). **Hidden** (not just disabled) for unauthorized users — same approach as other AdminUI gated actions.
|
||||
- Dispatch `RestartDriver` / `ReconnectDriver` messages through `AdminOperationsActor`, which audit-logs each operation.
|
||||
- Show spinner + inline "Reconnecting…" chip; panel reflects new state via the SignalR push once health changes.
|
||||
- Disabled when `Enabled=false` (nothing to restart) and during any in-flight Test Connect on the same page.
|
||||
|
||||
### 5.5 Out of scope this PR
|
||||
|
||||
History graphs (latency/error rate over time), deep diagnostics (per-tag last values, queue depths), and per-driver bespoke controls beyond Reconnect/Restart — all follow-ups.
|
||||
|
||||
### 5.6 Edge cases
|
||||
|
||||
- **Driver not yet deployed** (row exists, `Enabled=true`, cluster hasn't picked it up) — panel shows "Awaiting deployment", `DriverHealth.Unknown`.
|
||||
- **Edit page open while driver is running** — status reflects deployed config, not the form. Banner: "Showing live status for the deployed config — your unsaved changes take effect after Save → next deploy cycle."
|
||||
- **Test Connect + live status** — probe runs in a transient actor (Section 4), live status reflects the persistent actor. Don't interfere.
|
||||
|
||||
## 6. Tag / Address Picker
|
||||
|
||||
A picker slot on each page, launched as a modal so the config form stays visible behind it.
|
||||
|
||||
### 6.1 Shared shell
|
||||
|
||||
`<DriverTagPicker>` — modal chrome + search box + "use this address" action that emits a string back to the parent (e.g. `4x0001` for Modbus, `ns=2;s=Channel.Device.Tag` for OPC UA). Where the picked address lands depends on context: from "create equipment / create tag" flow, pushed into that form; standalone, copy-to-clipboard.
|
||||
|
||||
### 6.2 Per-driver bodies — first pass (all static address builders)
|
||||
|
||||
| Driver | Picker body |
|
||||
|---|---|
|
||||
| Modbus | Register-type dropdown + offset spinner + length → `4x00001-4` |
|
||||
| AbCip | Tag name + element index, PLC-family hint from form |
|
||||
| AbLegacy | File type (N/B/F/I/O/S/T/C/R) + file number + element, PLC-family-aware |
|
||||
| S7 | Area (DB/M/I/Q) + db-number + offset + S7 type → `DB10.DBD20:REAL` |
|
||||
| TwinCat | ADS variable name (free-text + format hint) |
|
||||
| FOCAS | Parameter group dropdown + parameter ID; drives FOCAS function-code lookup |
|
||||
| OpcUaClient | Static helper (NodeId free-text) — **live browse deferred** |
|
||||
| Galaxy | Static helper (tag_name.AttributeName free-text) — **live browse deferred** |
|
||||
| Historian.Wonderware | Tag name + retrieval mode + interval |
|
||||
|
||||
### 6.3 Deferred to follow-up
|
||||
|
||||
- **OpcUaClient live browse** — open session against configured endpoint, walk address space, return NodeId. Reuses the existing `Client.CLI` browse path or calls the OPC UA stack inline. Requires endpoint config to be valid (Test Connect first).
|
||||
- **Galaxy live browse** — calls mxaccessgw's `GalaxyRepository.ListObjects` / `ListAttributes` via gRPC. Returns `tag_name.AttributeName`. Reuses `IGalaxyHierarchySource`.
|
||||
- **Historian.Wonderware tag list** — pull from historian's tag store.
|
||||
|
||||
The picker slot is wired so swapping a static builder for a live browser later is a 1-component swap, not a page rewrite.
|
||||
|
||||
## 7. Error Handling
|
||||
|
||||
| Failure | Surface |
|
||||
|---|---|
|
||||
| Invalid form input | `DataAnnotationsValidator` + per-field `<ValidationMessage>`; Save disabled. |
|
||||
| `DbUpdateConcurrencyException` | Red banner — "Another user changed this driver instance, reload before re-applying." (matches existing pattern) |
|
||||
| FK violation (Namespace deleted while edit open) | Catch `DbUpdateException` — "Namespace `<id>` no longer exists in this cluster — pick another or recreate it." |
|
||||
| Probe — driver-side exception | Probe actor catches, returns `(false, ex.Message, null)`. Red chip with message. Full stack to Serilog with audit context. |
|
||||
| Probe — timeout | `(false, "Probe timed out after {n}s", null)`. Server-side 60s backstop. |
|
||||
| Probe — DI lookup fails (unknown driver type) | Defensive — `(false, "No probe registered for driver type '{type}'", null)`. Error-level log. |
|
||||
| SignalR disconnect | "Reconnecting…" chip + SignalR auto-reconnect. Stale snapshot dimmed after 30s. |
|
||||
| Reconnect/Restart on stopped driver | "Driver is not running on any node". Button re-enables. |
|
||||
| Authorization denied | Reconnect/Restart buttons hidden for unauthorized users. |
|
||||
| Corrupted `DriverConfig` JSON on row load | Yellow banner — "Saved config could not be parsed against the current schema; falling back to defaults. Save will overwrite." Original JSON preserved in banner for copy-paste. |
|
||||
|
||||
## 8. Testing
|
||||
|
||||
### 8.1 Unit tests (`tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/` — extend existing project, or create if absent)
|
||||
|
||||
- `DriverPageFormSerializationTests` — 9 drivers × round-trip Options ↔ JSON ↔ form ↔ DB row. Asserts no loss for known fields, unknown fields dropped silently.
|
||||
- `DriverTestConnectButtonTests` — render tests: enabled/disabled states, timeout behavior, result chip.
|
||||
- `DriverStatusPanelTests` — render snapshots for each `DriverState`, disabled mode, stale-data dim.
|
||||
- `DriverRestartReconnectAuthorizationTests` — buttons hidden without `DriverOperator` policy.
|
||||
- Address-builder unit tests per driver — 9 small suites covering canonical address formats.
|
||||
|
||||
### 8.2 Integration tests (`tests/Server/.../IntegrationTests/`)
|
||||
|
||||
- `DriverTestConnectE2eTests` — Modbus + AbCip + S7 against Docker fixtures (`lmxopcua-fix up modbus` etc.). Green probe vs sim, red probe vs wrong port, timeout vs black-holed IP.
|
||||
- `DriverReconnectE2eTests` — start a driver, click Reconnect, assert `Connecting → Healthy` transition within N seconds.
|
||||
- `DriverStatusHubE2eTests` — open hub, force state change, assert push arrives within 1s.
|
||||
|
||||
### 8.3 Manual smoke (run before PR ship)
|
||||
|
||||
Operator on the dev VM with Docker fixtures available:
|
||||
|
||||
1. Pre-flight:
|
||||
- `lmxopcua-fix up modbus standard` — Modbus sim running on `10.100.0.35:5020`.
|
||||
- AdminUI deployed and reachable.
|
||||
- LDAP user has the `DriverOperator` (or `FleetAdmin`) role.
|
||||
|
||||
2. Type picker:
|
||||
- Navigate to `/clusters/<id>/drivers/new`. Verify 9 driver-type cards render.
|
||||
- Click "ModbusTcp". Verify the typed form opens on `/clusters/<id>/drivers/new/modbustcp`.
|
||||
|
||||
3. Test Connect (form-driven, no save):
|
||||
- Fill in Host=`10.100.0.35`, Port=`5020`, leave defaults otherwise.
|
||||
- Click "Test Connect". Verify green chip + latency < 100ms.
|
||||
- Change port to `9999`. Click again. Verify red chip with "ConnectionRefused" or similar.
|
||||
- Change host to `1.2.3.4`. Click again. Within (default 5s) the chip shows "Probe timed out after 5s".
|
||||
|
||||
4. Save + edit:
|
||||
- Set valid endpoint back. Save. Verify redirect to `/clusters/<id>/drivers`.
|
||||
- Open the just-saved instance. Verify the typed form pre-populates correctly.
|
||||
|
||||
5. Live status panel:
|
||||
- In a second browser tab, open the same driver's edit page. Confirm the `DriverStatusPanel` renders state + last-update.
|
||||
- Stop the Modbus sim (`lmxopcua-fix down modbus`). Within ~30s, verify the panel transitions Healthy → Reconnecting / Faulted (depending on driver state).
|
||||
- Bring the sim back up (`lmxopcua-fix up modbus standard`). Verify Healthy is restored.
|
||||
|
||||
6. Reconnect / Restart:
|
||||
- Click "Reconnect" on the status panel. Verify a brief "Reconnecting…" chip + a Healthy state push within 5s.
|
||||
- Click "Restart". Confirm in the dialog. Verify the actor restarts (full state transition).
|
||||
- Verify both buttons are HIDDEN for an unauthorized user (LDAP user without `DriverOperator` role).
|
||||
|
||||
7. Address picker:
|
||||
- Click "Pick address" on the Modbus page. Verify the modal opens.
|
||||
- Builder: select Holding + offset=10 + length=2. Verify the chip shows `4x00010-2`. Click "Use this address" — verify it surfaces in the parent page.
|
||||
- Close the modal. Repeat for one other driver type (e.g. S7) to confirm cross-driver wiring.
|
||||
|
||||
8. Other 8 driver types — smoke each page renders:
|
||||
- Repeat steps 2–4 for each remaining driver type. For Galaxy, the Test Connect uses the mxaccessgw endpoint; for OPC UA, an `opc.tcp://` endpoint.
|
||||
|
||||
If any step fails, record the failure mode + Razor / actor log excerpts and reopen for fix before PR ship.
|
||||
|
||||
### 8.4 bUnit harness
|
||||
|
||||
If the AdminUI tests project doesn't already use bUnit, render tests downgrade to logic-only tests on the `@code { }` block; Razor markup is covered by integration tests. Decision deferred to implementation plan.
|
||||
|
||||
## 9. Migration / Sequencing
|
||||
|
||||
Incremental — driver-by-driver swap-over. Each step compile-clean and shippable on its own:
|
||||
|
||||
1. Land 9 Contracts projects + move Options classes. No UI changes.
|
||||
2. Land shared section components (`DriverIdentitySection`, `DriverResilienceSection`, `DriverFormShell`). Wire into existing `DriverEdit.razor` first so they're tested in place.
|
||||
3. Land `DriverTypePicker` + `DriverEditRouter` + `<DynamicComponent>` dispatch.
|
||||
4. Land driver-specific pages one at a time. After each, route list-page links for that driver type only to the new page; leave others on generic editor.
|
||||
5. Delete the generic `DriverEdit.razor` + its route once all 9 typed pages exist.
|
||||
6. Land `DriverStatusHub` + bridge + `<DriverStatusPanel>` (read-only first).
|
||||
7. Land `<DriverTestConnectButton>` + `IDriverProbe` impls + AdminOperationsActor handler.
|
||||
8. Land Reconnect/Restart on the status panel with `DriverOperator` policy.
|
||||
9. Land 9 static address builders inside `<DriverTagPicker>`.
|
||||
|
||||
## 10. Out of scope (follow-ups)
|
||||
|
||||
- Live tag browse for OpcUaClient + Galaxy (Section 6.3).
|
||||
- Historian.Wonderware tag list pulled from store.
|
||||
- Status panel history graphs + per-tag diagnostics (Section 5.5).
|
||||
- Per-driver bespoke controls beyond Reconnect/Restart.
|
||||
- bUnit setup if not already present (Section 8.4) — decide during implementation planning.
|
||||
@@ -0,0 +1,840 @@
|
||||
# AdminUI Driver-Specific Pages Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Replace `DriverEdit.razor` (generic JSON editor) with typed per-driver pages for all 9 driver types, each with Test Connect, live runtime status panel (Reconnect/Restart), and a driver-specific tag/address picker.
|
||||
|
||||
**Architecture:** 9 new `Driver.<Type>.Contracts` csprojs hold the `Options` POCOs (moved from runtime projects). AdminUI gains 9 thin `ProjectReference`s — no native deps leak. 9 typed `*DriverPage.razor` components share `<DriverFormShell>` + section/picker/status/test components in `Components/Shared/Drivers/`. Test Connect routes through `AdminOperationsActor` to per-driver `IDriverProbe` impls. Live status uses an Akka DistributedPubSub bridge → SignalR hub → Blazor panel (same pattern as the existing `FleetStatusSignalRBridge`).
|
||||
|
||||
**Tech Stack:** .NET 10 Blazor Server, EF Core (SQL Server), Akka.NET (cluster + DistributedPubSub), SignalR, OPC Foundation OPC UA .NET Standard stack, xUnit + Shouldly.
|
||||
|
||||
**Authoritative design:** `docs/plans/2026-05-28-adminui-driver-pages-design.md`. Re-read its sections when a task references them.
|
||||
|
||||
---
|
||||
|
||||
## Phase 0 — Preconditions
|
||||
|
||||
### Task 0.1: Create AdminUI test project (currently absent)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (every later test task depends on this)
|
||||
|
||||
**Files:**
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/ZB.MOM.WW.OtOpcUa.AdminUI.Tests.csproj`
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/_PlaceholderTests.cs`
|
||||
- Modify: `ZB.MOM.WW.OtOpcUa.slnx` (add the new project)
|
||||
|
||||
**Step 1:** Create csproj targeting `net10.0`, `<IsPackable>false</IsPackable>`. Package refs: `xunit`, `xunit.runner.visualstudio`, `Microsoft.NET.Test.Sdk`, `Shouldly`. Project ref: `..\..\..\src\Server\ZB.MOM.WW.OtOpcUa.AdminUI\ZB.MOM.WW.OtOpcUa.AdminUI.csproj`. Copy structure from a peer like `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/*.csproj`.
|
||||
|
||||
**Step 2:** Add a single `_PlaceholderTests.cs` with one passing fact so the project compiles + the test runner discovers something.
|
||||
|
||||
**Step 3:** Add `<Solution><Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/ZB.MOM.WW.OtOpcUa.AdminUI.Tests.csproj"/></Solution>` to `ZB.MOM.WW.OtOpcUa.slnx` (match the existing element style).
|
||||
|
||||
**Step 4:** Run `dotnet build tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests` then `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests`. Both succeed.
|
||||
|
||||
**Step 5:** Commit. `test(adminui): scaffold AdminUI.Tests project`
|
||||
|
||||
**Decision (deferred from design §8.4):** *no bUnit*. All Razor render tests degrade to logic-only tests on `@code { }` blocks. Razor markup is covered by the integration tests in Phase 6/7/8.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Contracts Projects (Driver Options → POCO-only siblings)
|
||||
|
||||
**Pattern (apply to every task in this phase):**
|
||||
|
||||
For driver type `<Type>` (folder `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.<Type>/`, options file `<Type>DriverOptions.cs`):
|
||||
|
||||
1. Create new csproj `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.<Type>.Contracts/ZB.MOM.WW.OtOpcUa.Driver.<Type>.Contracts.csproj`:
|
||||
```xml
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
</PropertyGroup>
|
||||
<!-- NO ProjectReference. NO PackageReference. Pure POCO. -->
|
||||
</Project>
|
||||
```
|
||||
2. `git mv` the options file from the runtime project into the contracts project. Preserve namespace (`ZB.MOM.WW.OtOpcUa.Driver.<Type>` → keep the same `namespace ZB.MOM.WW.OtOpcUa.Driver.<Type>` declaration so consumers don't change). If the options file `using`s anything that isn't `System.*` or `System.ComponentModel.DataAnnotations`, strip that dep — most options classes are pure POCO already; if any pulls a runtime-only type, leave a `// TODO: extract <type> too` and capture it in a follow-up task here.
|
||||
3. Add `<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Driver.<Type>.Contracts\ZB.MOM.WW.OtOpcUa.Driver.<Type>.Contracts.csproj" />` to the runtime project's csproj `<ItemGroup>`.
|
||||
4. Add the new contracts csproj to `ZB.MOM.WW.OtOpcUa.slnx`.
|
||||
5. `dotnet build src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.<Type>` → clean. `dotnet build ZB.MOM.WW.OtOpcUa.slnx` → clean.
|
||||
6. Commit. `refactor(driver-<type>): extract <Type>DriverOptions to .Contracts`
|
||||
|
||||
### Task 1.1: Modbus contracts
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 1.2 – 1.9 (different folders, different csprojs)
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts.csproj`
|
||||
- Move: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverOptions.cs` → `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts/ModbusDriverOptions.cs`
|
||||
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ZB.MOM.WW.OtOpcUa.Driver.Modbus.csproj` (add ProjectReference)
|
||||
- Modify: `ZB.MOM.WW.OtOpcUa.slnx`
|
||||
|
||||
Follow the Phase 1 pattern above.
|
||||
|
||||
### Task 1.2: AbCip contracts
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 1.1, 1.3 – 1.9
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Contracts/ZB.MOM.WW.OtOpcUa.Driver.AbCip.Contracts.csproj`
|
||||
- Move: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverOptions.cs` → contracts project
|
||||
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip/ZB.MOM.WW.OtOpcUa.Driver.AbCip.csproj`, `ZB.MOM.WW.OtOpcUa.slnx`
|
||||
|
||||
### Task 1.3: AbLegacy contracts
|
||||
**Classification:** small · **Estimated implement time:** ~3 min · **Parallelizable with:** 1.1–1.9 (except itself)
|
||||
**Files:** Create `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Contracts/*`, move `AbLegacyDriverOptions.cs`, update `Driver.AbLegacy.csproj` + slnx.
|
||||
|
||||
### Task 1.4: S7 contracts
|
||||
**Classification:** small · **Estimated implement time:** ~3 min · **Parallelizable with:** 1.1–1.9 (except itself)
|
||||
**Files:** Create `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7.Contracts/*`, move `S7DriverOptions.cs`, update `Driver.S7.csproj` + slnx.
|
||||
|
||||
### Task 1.5: TwinCAT contracts
|
||||
**Classification:** small · **Estimated implement time:** ~3 min · **Parallelizable with:** 1.1–1.9 (except itself)
|
||||
**Files:** Create `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Contracts/*`, move `TwinCATDriverOptions.cs`, update `Driver.TwinCAT.csproj` + slnx.
|
||||
|
||||
### Task 1.6: FOCAS contracts
|
||||
**Classification:** small · **Estimated implement time:** ~3 min · **Parallelizable with:** 1.1–1.9 (except itself)
|
||||
**Files:** Create `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS.Contracts/*`, move `FocasDriverOptions.cs`, update `Driver.FOCAS.csproj` + slnx.
|
||||
|
||||
### Task 1.7: OpcUaClient contracts
|
||||
**Classification:** small · **Estimated implement time:** ~3 min · **Parallelizable with:** 1.1–1.9 (except itself)
|
||||
**Files:** Create `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Contracts/*`, move `OpcUaClientDriverOptions.cs`, update `Driver.OpcUaClient.csproj` + slnx.
|
||||
|
||||
### Task 1.8: Galaxy contracts
|
||||
**Classification:** small · **Estimated implement time:** ~3 min · **Parallelizable with:** 1.1–1.9 (except itself)
|
||||
**Files:** Create `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Contracts/*`, move `Config/GalaxyDriverOptions.cs` (place at root of contracts project — drop the `Config/` subdir), update `Driver.Galaxy.csproj` + slnx.
|
||||
|
||||
### Task 1.9: Wonderware Historian client contracts
|
||||
**Classification:** small · **Estimated implement time:** ~3 min · **Parallelizable with:** 1.1–1.9 (except itself)
|
||||
**Files:** Create `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/*`, move `WonderwareHistorianClientOptions.cs`, update `Driver.Historian.Wonderware.Client.csproj` + slnx.
|
||||
|
||||
### Task 1.10: Validate the full solution + add ProbeTimeout property to each Options class
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on 1.1–1.9)
|
||||
|
||||
**Files:**
|
||||
- Modify: each of the 9 `*DriverOptions.cs` files just moved into the contracts projects.
|
||||
|
||||
**Step 1:** `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — clean.
|
||||
|
||||
**Step 2:** Add a `ProbeTimeout` property to each Options class with a driver-appropriate default:
|
||||
|
||||
```csharp
|
||||
/// <summary>Timeout for the AdminUI Test Connect probe. Server-side max = 60s.</summary>
|
||||
[Display(Name = "Probe timeout (seconds)", Description = "Connection test timeout. Default {n}s.", GroupName = "Diagnostics")]
|
||||
[Range(1, 60)]
|
||||
public int ProbeTimeoutSeconds { get; init; } = <default>;
|
||||
```
|
||||
|
||||
Defaults: Modbus 5, AbCip 5, AbLegacy 5, S7 5, TwinCAT 10, FOCAS 10, OpcUaClient 15, Galaxy 30, Wonderware Historian 15.
|
||||
|
||||
**Step 3:** `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — clean (any driver runtime code that constructs the Options via positional record syntax may break — fix by using `with { ProbeTimeoutSeconds = N }` or making it a property with default).
|
||||
|
||||
**Step 4:** `dotnet test ZB.MOM.WW.OtOpcUa.slnx` — all existing tests still pass.
|
||||
|
||||
**Step 5:** Commit. `feat(drivers): expose ProbeTimeoutSeconds on every driver Options class`
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Shared section components
|
||||
|
||||
### Task 2.1: DriverFormShell.razor
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 2.2, 2.3
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/DriverFormShell.razor`
|
||||
|
||||
**Step 1:** Implement panel chrome (`<section class="panel rise">` + `<div class="panel-head">`) with `Title`, `ChildContent`, `Footer` render fragments. Cancel/Save/Delete buttons via parameters: `OnSave` `EventCallback`, `OnCancel` `EventCallback`, `OnDelete` `EventCallback?` (null hides delete). `Busy` bool drives spinner + disabled. Error banner from `Error` string param.
|
||||
|
||||
**Step 2:** Pattern-match the existing `DriverEdit.razor` save bar (lines 116–128) — same visual layout.
|
||||
|
||||
**Step 3:** No code-behind logic; pure presentation.
|
||||
|
||||
**Step 4:** `dotnet build src/Server/ZB.MOM.WW.OtOpcUa.AdminUI` — clean.
|
||||
|
||||
**Step 5:** Commit. `feat(adminui): add DriverFormShell shared component`
|
||||
|
||||
### Task 2.2: DriverIdentitySection.razor
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 2.1, 2.3
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/DriverIdentitySection.razor`
|
||||
|
||||
**Step 1:** Component renders the identity fields lifted from `DriverEdit.razor` lines 38–88: `DriverInstanceId` (read-only when not new), `Name`, `NamespaceId` (select from `Namespace[]` passed in), `Enabled`. Bind via `IdentityModel` record passed as `@bind-Value`.
|
||||
|
||||
**Step 2:** Define `IdentityModel` record in the same file's `@code` block: `public sealed record IdentityModel { ... }`. Properties match the existing `FormModel` Identity fields, with their `[Required]` / `[RegularExpression]` attributes preserved.
|
||||
|
||||
**Step 3:** Component takes `IsNew` bool, `Namespaces` list.
|
||||
|
||||
**Step 4:** Build clean.
|
||||
|
||||
**Step 5:** Commit. `feat(adminui): add DriverIdentitySection shared component`
|
||||
|
||||
### Task 2.3: DriverResilienceSection.razor
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 2.1, 2.2
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/DriverResilienceSection.razor`
|
||||
|
||||
**Step 1:** For this PR, keep the existing JSON textarea for Polly overrides — typed-form-ifying Polly is out of scope (Section 10 of the design says so implicitly). The component wraps the textarea + help text from `DriverEdit.razor` lines 101–109 in a panel.
|
||||
|
||||
**Step 2:** Bind `[Parameter] public string? ResilienceConfig { get; set; }` + `EventCallback<string?> ResilienceConfigChanged`.
|
||||
|
||||
**Step 3:** Build clean.
|
||||
|
||||
**Step 4:** Commit. `feat(adminui): add DriverResilienceSection shared component`
|
||||
|
||||
### Task 2.4: Wire the three new sections into existing DriverEdit.razor
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (depends on 2.1–2.3)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/DriverEdit.razor`
|
||||
|
||||
**Step 1:** Replace lines 38–88 with `<DriverIdentitySection @bind-Value="_identityModel" Namespaces="_namespaces" IsNew="IsNew" />`. Wire `_identityModel` to/from `_form` in `OnInitializedAsync` and `SubmitAsync`.
|
||||
|
||||
**Step 2:** Replace lines 101–109 with `<DriverResilienceSection @bind-ResilienceConfig="_form.ResilienceConfig" />`.
|
||||
|
||||
**Step 3:** Wrap the form in `<DriverFormShell Busy="_busy" Error="_error" OnSave="SubmitAsync" OnCancel="@(...)" OnDelete="@(IsNew ? null : DeleteAsync)">`.
|
||||
|
||||
**Step 4:** Smoke test: `dotnet run --project src/Server/ZB.MOM.WW.OtOpcUa.Host` (admin role), open `/clusters/<existing>/drivers/<existing>`, page renders identically to before. (Driver config JSON textarea + identity fields + save bar visually unchanged.)
|
||||
|
||||
**Step 5:** Commit. `refactor(adminui): drive DriverEdit.razor through shared section components`
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Router + Type Picker
|
||||
|
||||
### Task 3.1: DriverTypePicker.razor
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 3.2
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/Drivers/DriverTypePicker.razor`
|
||||
|
||||
**Step 1:** `@page "/clusters/{ClusterId}/drivers/new"` (this *replaces* the same route on the existing `DriverEdit.razor` — order matters; we'll yank the old route in Task 3.3).
|
||||
|
||||
**Step 2:** Renders a grid of 9 driver-type cards (`ModbusTcp`, `AbCip`, `AbLegacy`, `S7`, `TwinCat`, `FOCAS`, `OpcUaClient`, `Galaxy`, `Historian.Wonderware`). Each card is a `<a href="/clusters/@ClusterId/drivers/new/<type-slug>">` linking to the typed new-form route. Type slug = lowercase driver-type string (e.g. `modbustcp` → keep human-readable; map slug → DriverType enum-string in a static dictionary in this file).
|
||||
|
||||
**Step 3:** Card content: driver type name, one-line description, an icon (text symbol fine — `[M]`, `[7]`, `[OPC]`, etc., no new images this PR).
|
||||
|
||||
**Step 4:** `<ClusterNav ClusterId="@ClusterId" ActiveTab="drivers" />` for consistency with peer pages.
|
||||
|
||||
**Step 5:** Build clean. Commit. `feat(adminui): add DriverTypePicker landing page`
|
||||
|
||||
### Task 3.2: DriverEditRouter.razor
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 3.1
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/Drivers/DriverEditRouter.razor`
|
||||
|
||||
**Step 1:** `@page "/clusters/{ClusterId}/drivers/{DriverInstanceId}"` — same edit route as today's `DriverEdit.razor` (will collide; resolve in Task 3.3).
|
||||
|
||||
**Step 2:** In `OnInitializedAsync`: load the `DriverInstance` row, read its `DriverType` string.
|
||||
|
||||
**Step 3:** Map `DriverType` → component type via a static dictionary literal `_componentMap`:
|
||||
```csharp
|
||||
private static readonly IReadOnlyDictionary<string, Type> _componentMap = new Dictionary<string, Type>(StringComparer.OrdinalIgnoreCase) {
|
||||
["ModbusTcp"] = typeof(ModbusDriverPage),
|
||||
["AbCip"] = typeof(AbCipDriverPage),
|
||||
["AbLegacy"] = typeof(AbLegacyDriverPage),
|
||||
["S7"] = typeof(S7DriverPage),
|
||||
["TwinCat"] = typeof(TwinCatDriverPage),
|
||||
["Focas"] = typeof(FocasDriverPage),
|
||||
["OpcUaClient"] = typeof(OpcUaClientDriverPage),
|
||||
["Galaxy"] = typeof(GalaxyDriverPage),
|
||||
["Historian.Wonderware"] = typeof(HistorianWonderwareDriverPage),
|
||||
};
|
||||
```
|
||||
|
||||
**Step 4:** Render `<DynamicComponent Type="_componentMap[_driverType]" Parameters="_params" />` where `_params = new Dictionary<string, object?> { ["ClusterId"] = ClusterId, ["DriverInstanceId"] = DriverInstanceId }`.
|
||||
|
||||
**Step 5:** Until the typed pages exist (Phase 4), the map is empty + this page falls back to a "not yet implemented for type X" notice. Keep route collision deferred until Task 3.3.
|
||||
|
||||
**Step 6:** Build clean. Commit. `feat(adminui): add DriverEditRouter dispatch page`
|
||||
|
||||
### Task 3.3: Resolve route collision — delete old new-route, keep old edit-route until Phase 5
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** none (depends on 3.1)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/DriverEdit.razor`
|
||||
|
||||
**Step 1:** Delete line 1 (`@page "/clusters/{ClusterId}/drivers/new"`). Keep line 2 (`@page "/clusters/{ClusterId}/drivers/{DriverInstanceId}"`). Until Task 3.4 unhooks it too, the old generic edit page still owns the edit route — `DriverEditRouter.razor` from Task 3.2 stays inert (build fine, but unreachable).
|
||||
|
||||
**Step 2:** Build clean.
|
||||
|
||||
**Step 3:** Smoke test: `/clusters/<id>/drivers/new` now hits `DriverTypePicker.razor`. `/clusters/<id>/drivers/<existing>` still hits `DriverEdit.razor`.
|
||||
|
||||
**Step 4:** Commit. `refactor(adminui): hand /drivers/new to DriverTypePicker`
|
||||
|
||||
### Task 3.4: Hand /drivers/{id} from DriverEdit.razor to DriverEditRouter.razor
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** none (depends on 3.3)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/DriverEdit.razor`
|
||||
|
||||
**Step 1:** Delete the remaining `@page` directive — file no longer routes. The router from Task 3.2 owns the route. The DriverEdit `.razor` file stays on disk as a referenceable component used as a fallback inside the router until all 9 typed pages land (Phase 4).
|
||||
|
||||
**Step 2:** Update `DriverEditRouter.razor` `_componentMap` so any driver type *not yet implemented* falls back to `typeof(DriverEdit)` — passing parameters identically. This keeps every existing driver row editable through whichever editor (typed or generic) is available at the time the row is opened.
|
||||
|
||||
**Step 3:** Build clean.
|
||||
|
||||
**Step 4:** Smoke test: open `/clusters/<id>/drivers/<existing-modbus-row>`. Router dispatches → falls back to `DriverEdit.razor` (since `ModbusDriverPage` doesn't exist yet) → page renders as before.
|
||||
|
||||
**Step 5:** Commit. `refactor(adminui): route /drivers/{id} through DriverEditRouter`
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Typed driver pages (one per driver)
|
||||
|
||||
**Pattern (apply to every task in this phase):**
|
||||
|
||||
Each `<Type>DriverPage.razor` is a self-contained page with:
|
||||
|
||||
1. Route(s):
|
||||
- `@page "/clusters/{ClusterId}/drivers/new/<type-slug>"` (new path)
|
||||
- The router (Task 3.2) dispatches the edit case — the page itself does NOT declare the edit route; it accepts `[Parameter] public string? DriverInstanceId { get; set; }` and the router passes it.
|
||||
2. Wraps everything in `<DriverFormShell>` (Task 2.1).
|
||||
3. Top: `<DriverIdentitySection>` (Task 2.2).
|
||||
4. Middle: a `<section class="panel">` per logical group of driver options. Each `<InputText>` / `<InputNumber>` / `<InputSelect>` is *explicitly* bound to a property on a form model (`<Type>FormModel`) inside `@code`. Field labels + help text come from the `[Display(Name, Description, GroupName)]` attributes on the `Options` class — but read via `ModelMetadata`, NOT via reflection at render time. (Implementation hint: use a static helper `static string Label<T>(Expression<Func<T,object?>> path)` that pops `[Display]` off at compile-time — simpler is to just hard-code the label in markup and treat `[Display]` as the redundant runtime hint for the API/validator. **Hard-coding labels is the chosen path — keep the page Razor explicit.**)
|
||||
5. Below: `<DriverTestConnectButton DriverType="<Type>" GetConfigJson="@BuildConfigJson" TimeoutSeconds="@_form.ProbeTimeoutSeconds" />` (real component lands in Phase 7; for Phase 4 ship a stub component that just disables the button and shows "Available after Phase 7" — defined in Phase 7 Task 7.5).
|
||||
6. Below: `<DriverStatusPanel DriverInstanceId="@DriverInstanceId" Enabled="@_form.Enabled" />` (only visible in edit mode, not new — stub lands in Phase 6).
|
||||
7. Below: `<DriverResilienceSection>` (Task 2.3).
|
||||
8. Tag picker: opens `<DriverTagPicker>` modal with the driver's picker body (Phase 9).
|
||||
9. Save path: serializes the typed form model to JSON via `JsonSerializer.Serialize(_form.Config, _jsonOpts)`, normalizes (same as today's `NormalizeJson`), upserts the `DriverInstance` row with `RowVersion` opt-concurrency. Match the existing save flow in `DriverEdit.razor:187-257` line-for-line for the upsert mechanics.
|
||||
10. Load path: if `DriverInstanceId != null`, load row, `JsonSerializer.Deserialize<<Type>Options>(row.DriverConfig, _jsonOpts)` where `_jsonOpts = new() { UnmappedMemberHandling = JsonUnmappedMemberHandling.Skip }`. Wrap into the form model.
|
||||
11. After save, navigate to `/clusters/{ClusterId}/drivers` (the list page — match existing behavior).
|
||||
|
||||
**Per-driver task acceptance:**
|
||||
- Page compiles, routes resolve.
|
||||
- For a new instance: typed form save round-trips to DB; row's `DriverType` is set; `DriverConfig` JSON contains every field shown in the form.
|
||||
- For an edit: page loads existing row; every field populates; save preserves all fields.
|
||||
- Update `DriverEditRouter.razor` `_componentMap` to point this driver type at the new page.
|
||||
- Update `ClusterDrivers.razor` (the list page) — no change needed; it already links via the unified edit route.
|
||||
- Add a unit test in `tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/<Type>DriverPageFormSerializationTests.cs` round-tripping `<Type>Options` ↔ JSON ↔ form-model ↔ `<Type>Options`. Use Shouldly.
|
||||
|
||||
### Task 4.1: ModbusDriverPage.razor
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** 4.2 – 4.9
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/Drivers/ModbusDriverPage.razor`
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/ModbusDriverPageFormSerializationTests.cs`
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/Drivers/DriverEditRouter.razor` (`_componentMap`)
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/ZB.MOM.WW.OtOpcUa.AdminUI.csproj` (add `ProjectReference` to `Driver.Modbus.Contracts`)
|
||||
|
||||
Follow Phase 4 pattern. Modbus is the largest options class (289 lines); group into panels: **Transport** (endpoint, port, unit ID), **Polling** (interval, batch sizes), **Probe** (probe options + `ProbeTimeoutSeconds`), **Tuning** (timeouts, retries). Read `ModbusDriverOptions.cs` first to enumerate every property.
|
||||
|
||||
### Task 4.2: AbCipDriverPage.razor
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** 4.1, 4.3 – 4.9
|
||||
|
||||
**Files:** as 4.1, swap `Modbus` → `AbCip`. Add `ProjectReference` to `Driver.AbCip.Contracts`. Read `AbCipDriverOptions.cs` to enumerate fields. PLC family selector (CompactLogix / ControlLogix) is a key field.
|
||||
|
||||
### Task 4.3: AbLegacyDriverPage.razor
|
||||
**Classification:** standard · **Estimated implement time:** ~5 min · **Parallelizable with:** 4.1, 4.2, 4.4–4.9
|
||||
**Files:** as 4.1 for AbLegacy. Read `AbLegacyDriverOptions.cs`.
|
||||
|
||||
### Task 4.4: S7DriverPage.razor
|
||||
**Classification:** standard · **Estimated implement time:** ~5 min · **Parallelizable with:** 4.1–4.3, 4.5–4.9
|
||||
**Files:** as 4.1 for S7. CPU/rack/slot tuple in a Connection panel.
|
||||
|
||||
### Task 4.5: TwinCatDriverPage.razor
|
||||
**Classification:** standard · **Estimated implement time:** ~5 min · **Parallelizable with:** 4.1–4.4, 4.6–4.9
|
||||
**Files:** as 4.1 for TwinCAT. AMS NetId + port in a Connection panel.
|
||||
|
||||
### Task 4.6: FocasDriverPage.razor
|
||||
**Classification:** standard · **Estimated implement time:** ~5 min · **Parallelizable with:** 4.1–4.5, 4.7–4.9
|
||||
**Files:** as 4.1 for FOCAS. CNC series + connection params.
|
||||
|
||||
### Task 4.7: OpcUaClientDriverPage.razor
|
||||
**Classification:** standard · **Estimated implement time:** ~5 min · **Parallelizable with:** 4.1–4.6, 4.8, 4.9
|
||||
**Files:** as 4.1 for OpcUaClient. **Security profile (None / Basic256Sha256-Sign / Basic256Sha256-SignAndEncrypt)** is a dropdown sourced from the same enum the OPC UA Server uses (cross-ref `docs/security.md`). Username/password are `[DataType(DataType.Password)]`.
|
||||
|
||||
### Task 4.8: GalaxyDriverPage.razor
|
||||
**Classification:** standard · **Estimated implement time:** ~5 min · **Parallelizable with:** 4.1–4.7, 4.9
|
||||
**Files:** as 4.1 for Galaxy. Two panels: **mxaccessgw** (gateway endpoint, API key — password input), **Galaxy** (ClientName, SQL config db connection if exposed in options). API key field is `[DataType(DataType.Password)]`.
|
||||
|
||||
### Task 4.9: HistorianWonderwareDriverPage.razor
|
||||
**Classification:** standard · **Estimated implement time:** ~5 min · **Parallelizable with:** 4.1–4.8
|
||||
**Files:** as 4.1 for the Wonderware Historian (the page covers the *driver*'s view of historian client options — the `WonderwareHistorianClientOptions` lives in the `.Client.Contracts` project from Task 1.9). The driver type-string in DriverInstance is `Historian.Wonderware`.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Delete the generic DriverEdit.razor
|
||||
|
||||
### Task 5.1: Remove DriverEdit.razor + fallback in router
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (depends on all 4.x tasks)
|
||||
|
||||
**Files:**
|
||||
- Delete: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/DriverEdit.razor`
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/Drivers/DriverEditRouter.razor` (remove fallback)
|
||||
|
||||
**Step 1:** Confirm `_componentMap` in the router has all 9 driver types. Delete the "fallback to DriverEdit" branch.
|
||||
|
||||
**Step 2:** `git rm src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Clusters/DriverEdit.razor`.
|
||||
|
||||
**Step 3:** Build clean. `dotnet test ZB.MOM.WW.OtOpcUa.slnx` — clean.
|
||||
|
||||
**Step 4:** Smoke test all 9 driver types: open the list page, open one existing row of each type, verify the typed page renders. (For types without existing rows on dev DB, create one via the type picker first.)
|
||||
|
||||
**Step 5:** Commit. `refactor(adminui): retire generic DriverEdit.razon (typed pages cover all 9 drivers)`
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Live status panel
|
||||
|
||||
### Task 6.1: DriverHealthChanged DPS message
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 6.2 (different folders)
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Messages/Drivers/DriverHealthChanged.cs`
|
||||
|
||||
**Step 1:** Define record `public sealed record DriverHealthChanged(string ClusterId, string DriverInstanceId, string State, DateTime? LastSuccessUtc, string? LastError, int ErrorCount5Min, DateTime PublishedUtc);` — `State` is the `DriverState` enum string (matches `Healthy` / `Connecting` / `Faulted` / `Unknown` used by `DriverHealth`).
|
||||
|
||||
**Step 2:** Add `[MemoryPackable]` if peer messages in this folder use MemoryPack — match the folder's existing pattern. (Look at `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Messages/Fleet/FleetStatusChanged.cs` for the canonical pattern.)
|
||||
|
||||
**Step 3:** Build clean. Commit. `feat(messages): add DriverHealthChanged DPS contract`
|
||||
|
||||
### Task 6.2: Publish DriverHealthChanged from each driver actor
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min (per driver; bundle = ~15 min — **split this into 6.2a–6.2d if implementer balks**)
|
||||
**Parallelizable with:** none (touches every driver actor)
|
||||
|
||||
**Files:**
|
||||
- Modify: each `<Type>Driver.cs` — find the place that updates `_health` (e.g. `FocasDriver.cs:565+`, `ModbusDriver.cs:1180+`). After every `Volatile.Write(ref _health, ...)`, also publish to DPS topic `driver-health-{ClusterId}` via the injected `DistributedPubSub.Get(_actorSystem).Mediator`.
|
||||
- Find pattern: `Volatile.Write(ref _health,` — every occurrence in `src/Drivers/**/*.cs` not under obj/bin.
|
||||
|
||||
**Step 1:** Inject the publish callback into each driver. The cleanest hook is `IDriverHealthPublisher` (new interface in `Core.Abstractions`), with the Akka-backed impl living in `Runtime` and DI-registered there. Driver constructors take `IDriverHealthPublisher` (nullable for backward compat in tests).
|
||||
|
||||
**Step 2:** After each `_health` write, call `_healthPublisher?.Publish(new DriverHealthChanged(...))`. Pull `ClusterId` + `DriverInstanceId` from the driver's existing identity (every driver already knows its instance ID for telemetry tags).
|
||||
|
||||
**Step 3:** Add `ErrorCount5Min` tracking: a sliding-window counter on the driver — bump on every transition into `Faulted`, decay over 5min. Simple impl: a `Queue<DateTime>` guarded by lock; on read, dequeue entries older than 5min and return `.Count`.
|
||||
|
||||
**Step 4:** `dotnet build` clean. `dotnet test` clean. Driver unit tests may need a no-op `IDriverHealthPublisher` (provide one in Core.Abstractions: `public sealed class NullDriverHealthPublisher : IDriverHealthPublisher { public void Publish(DriverHealthChanged _) { } }`).
|
||||
|
||||
**Step 5:** Commit. `feat(drivers): publish DriverHealthChanged to DPS on every health transition`
|
||||
|
||||
### Task 6.3: DriverStatusHub
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 6.4
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Hubs/DriverStatusHub.cs`
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Hubs/HubServiceCollectionExtensions.cs` (register hub)
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Hubs/HubRouteBuilderExtensions.cs` (`MapHub<DriverStatusHub>("/hubs/driverstatus")`)
|
||||
|
||||
**Step 1:** Hub with one method: `Task JoinDriver(string driverInstanceId)` — adds the connection to a group named `driver:{driverInstanceId}`, immediately invokes `Clients.Caller.SendAsync("status", currentSnapshot)`. Inject `IDriverStatusSnapshotStore` (new — Task 6.4) to read the current snapshot.
|
||||
|
||||
**Step 2:** Hub method-name constant: `public const string MethodName = "status";`.
|
||||
|
||||
**Step 3:** `[Microsoft.AspNetCore.Authorization.Authorize]` on the class (same auth as the existing AdminUI hubs).
|
||||
|
||||
**Step 4:** Build clean. Commit. `feat(adminui): add DriverStatusHub`
|
||||
|
||||
### Task 6.4: DriverStatusSignalRBridge + snapshot store
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 6.3
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Hubs/DriverStatusSignalRBridge.cs`
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Hubs/IDriverStatusSnapshotStore.cs`
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Hubs/InMemoryDriverStatusSnapshotStore.cs`
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Hubs/HubServiceCollectionExtensions.cs` (spawn bridge actor on admin-role startup; DI-register snapshot store as singleton)
|
||||
|
||||
**Step 1:** Bridge = Akka `ReceiveActor`. `PreStart` subscribes to DPS topic `driver-health-*` (or one topic + per-cluster filter — pick the same wildcard convention `FleetStatusSignalRBridge` uses). On `DriverHealthChanged msg`: writes to `IDriverStatusSnapshotStore` (latest-snapshot-wins, keyed by instance ID), then `_hub.Clients.Group($"driver:{msg.DriverInstanceId}").SendAsync(DriverStatusHub.MethodName, msg)`.
|
||||
|
||||
**Step 2:** `InMemoryDriverStatusSnapshotStore`: `ConcurrentDictionary<string, DriverHealthChanged> _byInstance;` — `Upsert(msg)` and `TryGet(instanceId, out msg)`.
|
||||
|
||||
**Step 3:** Wire bridge in `AddOtOpcUaSignalRBridges` (or equivalent). Singleton snapshot store. Build clean.
|
||||
|
||||
**Step 4:** Commit. `feat(adminui): add DriverStatusSignalRBridge + snapshot store`
|
||||
|
||||
### Task 6.5: DriverStatusPanel.razor
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (consumes 6.3 + 6.4)
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/DriverStatusPanel.razor`
|
||||
|
||||
**Step 1:** Parameters: `DriverInstanceId`, `Enabled`. Inject `NavigationManager` for hub URL building, `IServiceProvider` for hub-connection auth.
|
||||
|
||||
**Step 2:** `OnInitializedAsync`: if `Enabled == false`, render "Disabled — not deployed" + skip the hub. Else build a `HubConnection` (`HubConnectionBuilder` → `WithUrl(Nav.ToAbsoluteUri("/hubs/driverstatus"))` → `WithAutomaticReconnect()` → `Build()`), register `On<DriverHealthChanged>("status", ...)`, `StartAsync`, then `InvokeAsync("JoinDriver", DriverInstanceId)`. The handler updates `_snapshot` + `StateHasChanged`.
|
||||
|
||||
**Step 3:** Render: state chip (color-mapped: `Healthy` green, `Connecting` yellow, `Faulted` red, `Unknown` gray) + "last success {humanized timestamp}" + `ErrorCount5Min` badge + collapsible "last error" panel showing `LastError` if set. Visual reuses existing `chip` / `panel` styles from sibling pages.
|
||||
|
||||
**Step 4:** `_lastSnapshotAt = DateTime.UtcNow` on each push; if `(now - _lastSnapshotAt) > 30s` (timer-driven re-render every 5s), add `dim` class to the whole panel.
|
||||
|
||||
**Step 5:** `DisposeAsync`: `await _hub.DisposeAsync()`. Implement `IAsyncDisposable`.
|
||||
|
||||
**Step 6:** Wire into all 9 driver pages. In edit mode (i.e. `DriverInstanceId != null`), render `<DriverStatusPanel DriverInstanceId="@DriverInstanceId" Enabled="@_form.Identity.Enabled" />` above the resilience section.
|
||||
|
||||
**Step 7:** Build clean. Smoke test: bring up `lmxopcua-fix up modbus`, deploy a Modbus driver pointing at the sim, observe `Healthy` push within seconds. Stop the sim, observe `Faulted` push within the driver's poll interval. Commit. `feat(adminui): live driver status panel on every driver page`
|
||||
|
||||
---
|
||||
|
||||
## Phase 7 — Test Connect
|
||||
|
||||
### Task 7.1: IDriverProbe interface + TestDriverConnect message
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 7.2
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverProbe.cs`
|
||||
- Create: `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Messages/Admin/TestDriverConnect.cs`
|
||||
- Create: `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Messages/Admin/TestDriverConnectResult.cs`
|
||||
|
||||
**Step 1:** `IDriverProbe`:
|
||||
```csharp
|
||||
public interface IDriverProbe
|
||||
{
|
||||
/// <summary>Driver-type string this probe handles (matches DriverInstance.DriverType).</summary>
|
||||
string DriverType { get; }
|
||||
/// <summary>Run a connection probe. Never mutates; never writes.</summary>
|
||||
Task<DriverProbeResult> ProbeAsync(string configJson, TimeSpan timeout, CancellationToken ct);
|
||||
}
|
||||
public sealed record DriverProbeResult(bool Ok, string? Message, TimeSpan? Latency);
|
||||
```
|
||||
|
||||
**Step 2:** `TestDriverConnect(string DriverType, string ConfigJson, int TimeoutSeconds, Guid CorrelationId)` and `TestDriverConnectResult(bool Ok, string? Message, double? LatencyMs, Guid CorrelationId)`. Match the MemoryPack conventions of peer messages in `Messages/Admin/`.
|
||||
|
||||
**Step 3:** Build clean. Commit. `feat(messages,abstractions): add IDriverProbe + TestDriverConnect contract`
|
||||
|
||||
### Task 7.2: AdminOperationsActor handler for TestDriverConnect
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 7.1 (separate file; modify late)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs`
|
||||
|
||||
**Step 1:** Add ctor param `IEnumerable<IDriverProbe> probes`. Build `_probesByType = probes.ToDictionary(p => p.DriverType, StringComparer.OrdinalIgnoreCase)` in ctor. Update `Props` factory accordingly.
|
||||
|
||||
**Step 2:** `ReceiveAsync<TestDriverConnect>(HandleTestDriverConnectAsync)`. Handler:
|
||||
```csharp
|
||||
private async Task HandleTestDriverConnectAsync(TestDriverConnect msg)
|
||||
{
|
||||
var replyTo = Sender;
|
||||
if (!_probesByType.TryGetValue(msg.DriverType, out var probe))
|
||||
{
|
||||
replyTo.Tell(new TestDriverConnectResult(false, $"No probe registered for driver type '{msg.DriverType}'", null, msg.CorrelationId));
|
||||
return;
|
||||
}
|
||||
var clampedSec = Math.Clamp(msg.TimeoutSeconds, 1, 60);
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(clampedSec));
|
||||
try
|
||||
{
|
||||
var sw = Stopwatch.StartNew();
|
||||
var result = await probe.ProbeAsync(msg.ConfigJson, TimeSpan.FromSeconds(clampedSec), cts.Token);
|
||||
replyTo.Tell(new TestDriverConnectResult(result.Ok, result.Message, sw.Elapsed.TotalMilliseconds, msg.CorrelationId));
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
replyTo.Tell(new TestDriverConnectResult(false, $"Probe timed out after {clampedSec}s", null, msg.CorrelationId));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.Error(ex, "Probe for {DriverType} threw", msg.DriverType);
|
||||
replyTo.Tell(new TestDriverConnectResult(false, ex.Message, null, msg.CorrelationId));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 3:** Update wherever `AdminOperationsActor.Props(...)` is called (search the repo) to pass the new `probes` enumerable. Likely in `Runtime` DI registration — register all `IDriverProbe` impls then resolve `IEnumerable<IDriverProbe>` for the singleton.
|
||||
|
||||
**Step 4:** Build clean. Commit. `feat(adminops): handle TestDriverConnect via per-driver IDriverProbe`
|
||||
|
||||
### Task 7.3: TCP-only probe impls (Modbus, AbCip, AbLegacy, S7)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 7.4
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverProbe.cs`
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriverProbe.cs`
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriverProbe.cs`
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.S7/S7DriverProbe.cs`
|
||||
|
||||
**Step 1:** Each impl deserializes its own Options class from the configJson, extracts `Endpoint` (host:port — exact field name depends on the Options class), opens a TCP `Socket` with `ConnectAsync(host, port, ct)`, closes immediately. On success: `(true, null, sw.Elapsed)`. On `SocketException`: `(false, ex.SocketErrorCode.ToString(), null)`.
|
||||
|
||||
**Step 2:** Register each as `services.AddSingleton<IDriverProbe, ModbusDriverProbe>()` (and peers) in the driver's existing `*FactoryExtensions.cs` `Add*` method.
|
||||
|
||||
**Step 3:** Build clean. Commit. `feat(drivers): TCP-only probes for Modbus, AbCip, AbLegacy, S7`
|
||||
|
||||
### Task 7.4: Specialty probes (FOCAS, TwinCAT, OpcUaClient, Galaxy, Historian.Wonderware)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min (per driver; bundle ~15 min — **may need to split into 7.4a–7.4e**)
|
||||
**Parallelizable with:** Task 7.3
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS/FocasDriverProbe.cs` — calls FOCAS `cnc_allclibhndl3` connect + immediate `cnc_freelibhndl`. Reuses the existing `IFocasClient.Connect`.
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriverProbe.cs` — opens an `AmsAddress` and sends an ADS Read State request.
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriverProbe.cs` — opens an `OPCFoundation.NetStandard.Opc.Ua.Client.Session` against the configured endpoint with the configured security profile, immediately closes.
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Health/GalaxyDriverProbe.cs` — sends `MxCommand.Ping` to mxaccessgw via the existing gRPC client. (Build on the existing `Health/` folder.)
|
||||
- Create: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/HistorianWonderwareDriverProbe.cs` — TCP probe to historian endpoint (historian client uses an MX-style transport; cheap path is a TCP connect to the historian's IPC port + close).
|
||||
|
||||
**Step 1:** Each impl registers in its driver's `Add*` extension.
|
||||
|
||||
**Step 2:** Build clean. `dotnet test` clean. Commit. `feat(drivers): specialty Test Connect probes for FOCAS/TwinCAT/OPCUA/Galaxy/Historian`
|
||||
|
||||
### Task 7.5: DriverTestConnectButton.razor
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (consumes 7.2 + 7.3 + 7.4)
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/DriverTestConnectButton.razor`
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Clients/AdminProbeService.cs`
|
||||
|
||||
**Step 1:** `AdminProbeService` — thin wrapper around `IAdminOperationsClient`. Method `Task<TestDriverConnectResult> TestAsync(string driverType, string configJson, int timeoutSeconds, CancellationToken ct)`. Builds the message + Asks + applies a `Task.WhenAny` 65s timeout wall as outer guard. DI-register as scoped.
|
||||
|
||||
**Step 2:** Button component params: `DriverType`, `GetConfigJson` (`Func<string>`), `TimeoutSeconds` (`int`). Renders `<button class="btn btn-outline-primary btn-sm">Test Connect</button>` + inline result chip (green tick + latency, or red x + message). Spinner during in-flight. Auto-clears chip after 30s.
|
||||
|
||||
**Step 3:** On click: invokes `AdminProbeService.TestAsync(DriverType, GetConfigJson(), TimeoutSeconds, ct)` with `CancellationToken.None` (the actor-side timeout already bounds it).
|
||||
|
||||
**Step 4:** Wire into all 9 driver pages by replacing the Phase 4 stub.
|
||||
|
||||
**Step 5:** Build clean. Smoke test: open `/clusters/<id>/drivers/new/modbustcp`, type sim endpoint into form, click Test Connect → green. Wrong port → red within 5s. Black-holed IP → "Probe timed out after 5s".
|
||||
|
||||
**Step 6:** Commit. `feat(adminui): Test Connect button on every driver page`
|
||||
|
||||
---
|
||||
|
||||
## Phase 8 — Reconnect / Restart
|
||||
|
||||
### Task 8.1: RestartDriver + ReconnectDriver messages + AdminOperationsActor handlers
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 8.2 (separate files)
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Messages/Admin/RestartDriver.cs`
|
||||
- Create: `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Messages/Admin/ReconnectDriver.cs`
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs`
|
||||
|
||||
**Step 1:** Messages: `RestartDriver(string ClusterId, string DriverInstanceId, string ActorByUserName, Guid CorrelationId)` + `RestartDriverResult(bool Ok, string? Message, Guid CorrelationId)`. Same shape for `Reconnect*`.
|
||||
|
||||
**Step 2:** Handlers in the actor. They locate the running driver actor (the existing `DriverHostActor` hierarchy already addresses driver actors by instance ID — find the existing lookup mechanism in `DriverHostActor.cs` / `Runtime` and reuse it). Reconnect = Tell the driver actor a `Reconnect` internal command; Restart = Tell its supervisor to stop+restart the child.
|
||||
|
||||
**Step 3:** Audit-log every call via the existing `ConfigEdits` mechanism — entity type `DriverInstance`, fields `{op: restart|reconnect}`.
|
||||
|
||||
**Step 4:** Build clean. Commit. `feat(adminops): Restart/Reconnect driver operations`
|
||||
|
||||
### Task 8.2: DriverOperator authorization policy
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 8.1
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Security/` — add a `DriverOperator` policy alongside the existing `WriteOperate` / `WriteTune` policies. Map to LDAP group `ot-driver-operator` (or document the chosen group name in `docs/Security.md`).
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/` — register the policy with `AddAuthorizationBuilder().AddPolicy("DriverOperator", p => p.RequireRole("ot-driver-operator"))` (or whatever pattern the existing AdminUI policies use).
|
||||
- Modify: `docs/Security.md` — add a row to the role/policy table.
|
||||
|
||||
**Step 1:** Mirror the shape of the most-similar existing policy (probably `WriteOperate`).
|
||||
|
||||
**Step 2:** Build clean. Commit. `feat(security): add DriverOperator authorization policy`
|
||||
|
||||
### Task 8.3: Wire Reconnect/Restart buttons into DriverStatusPanel
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (depends on 8.1 + 8.2 + 6.5)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/DriverStatusPanel.razor`
|
||||
|
||||
**Step 1:** Inject `IAuthorizationService`. In `OnInitializedAsync`, check `AuthorizeAsync(user, null, "DriverOperator")` → set `_canOperate` bool. Render buttons only when `_canOperate && Enabled`.
|
||||
|
||||
**Step 2:** Two buttons:
|
||||
- **Reconnect** — no confirm. Click: spinner on button, set inline "Reconnecting…" chip, invoke `AdminOperationsClient.AskAsync<ReconnectDriverResult>(new ReconnectDriver(...))`. Result chip clears once next `DriverHealthChanged` push arrives.
|
||||
- **Restart** — confirm dialog "Restart driver `<id>`? This briefly interrupts subscriptions." Same flow otherwise.
|
||||
|
||||
**Step 3:** Both buttons disabled (greyed-out) during in-flight ops or during a Test Connect on the same page (publish a simple page-scoped `bool _busyAnything` via the parent driver page → flows to the panel via a parameter).
|
||||
|
||||
**Step 4:** Build clean. Smoke test: Reconnect → see `Connecting → Healthy` transition push in the panel. Restart → confirm → see actor restart. Commit. `feat(adminui): Reconnect/Restart on DriverStatusPanel (DriverOperator-gated)`
|
||||
|
||||
---
|
||||
|
||||
## Phase 9 — Static address pickers
|
||||
|
||||
### Task 9.1: DriverTagPicker.razor modal shell
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** 9.2–9.10
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/DriverTagPicker.razor`
|
||||
|
||||
**Step 1:** Modal shell. Params: `Visible` (`bool`), `OnClose` (`EventCallback`), `Title` (string, e.g. "Modbus address"), `ChildContent` (`RenderFragment`), `OnPickAddress` (`EventCallback<string>`). Renders a Bootstrap-style `.modal.show` (no JS interop — Razor-managed visibility class). The child fragment is the per-driver picker body.
|
||||
|
||||
**Step 2:** Includes a search box + "Use this address" button at the bottom; "Use" calls `OnPickAddress` with the value currently bound in the child.
|
||||
|
||||
**Step 3:** Build clean. Commit. `feat(adminui): DriverTagPicker modal shell`
|
||||
|
||||
### Task 9.2 – 9.10: Per-driver static picker bodies (9 tasks)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min each
|
||||
**Parallelizable with:** each other (different files)
|
||||
|
||||
For each driver, create `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Shared/Drivers/Pickers/<Type>AddressPickerBody.razor`. Per Section 6.2 of the design:
|
||||
|
||||
| Task | Driver | Picker body |
|
||||
|---|---|---|
|
||||
| 9.2 | Modbus | Register-type dropdown (Coil/DiscreteInput/Holding/Input) + offset spinner + length → renders `4x00001-4`. |
|
||||
| 9.3 | AbCip | Tag name + element index; CompactLogix/ControlLogix hint from form. |
|
||||
| 9.4 | AbLegacy | File type (N/B/F/I/O/S/T/C/R) + file number + element. |
|
||||
| 9.5 | S7 | Area (DB/M/I/Q) + db-number + offset + S7 type → `DB10.DBD20:REAL`. |
|
||||
| 9.6 | TwinCat | Free-text ADS variable name + format hint. |
|
||||
| 9.7 | FOCAS | Parameter group dropdown + parameter ID; drives the FOCAS function-code lookup table. |
|
||||
| 9.8 | OpcUaClient | Free-text NodeId field. (Live browse deferred — Section 10.) |
|
||||
| 9.9 | Galaxy | Free-text `tag_name.AttributeName` field. (Live browse deferred.) |
|
||||
| 9.10 | Historian.Wonderware | Tag name + retrieval mode + interval. |
|
||||
|
||||
Each task:
|
||||
|
||||
**Step 1:** Create the per-driver picker body component.
|
||||
|
||||
**Step 2:** Add a small unit test in `tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/<Type>AddressBuilderTests.cs` — for the static address builders that compute a string (Modbus, S7, FOCAS), assert input → string output. For free-text bodies (OpcUaClient, Galaxy), test pass-through.
|
||||
|
||||
**Step 3:** Wire into the matching `*DriverPage.razor` — add a "Pick address" button that toggles `<DriverTagPicker>` open with this body as its child.
|
||||
|
||||
**Step 4:** Build clean. Commit per task. `feat(adminui): <Type> address picker`
|
||||
|
||||
---
|
||||
|
||||
## Phase 10 — End-to-end verification
|
||||
|
||||
### Task 10.1: DriverTestConnectE2eTests
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** 10.2, 10.3
|
||||
|
||||
**Files:**
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/DriverTestConnectE2eTests.cs`
|
||||
|
||||
**Step 1:** Use the existing Docker fixture pattern from peer tests in this project. Three test methods: Modbus, AbCip, S7 — each starts the corresponding `lmxopcua-fix up <driver>` fixture (the test project's fixture base class handles it) + asserts green probe vs sim, red probe vs wrong port, timeout vs `1.2.3.4:502` (black-holed).
|
||||
|
||||
**Step 2:** `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests --filter DriverTestConnectE2eTests` — passes.
|
||||
|
||||
**Step 3:** Commit. `test(adminui): E2E Test Connect probes against Docker sims`
|
||||
|
||||
### Task 10.2: DriverReconnectE2eTests
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** 10.1, 10.3
|
||||
|
||||
**Files:**
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/DriverReconnectE2eTests.cs`
|
||||
|
||||
**Step 1:** Start a Modbus driver against the sim, observe `Healthy`, dispatch `ReconnectDriver` via the in-cluster admin ops client, assert `Connecting → Healthy` transitions within 5s.
|
||||
|
||||
**Step 2:** Build + run. Commit. `test(adminui): E2E Reconnect operation`
|
||||
|
||||
### Task 10.3: DriverStatusHubE2eTests
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** 10.1, 10.2
|
||||
|
||||
**Files:**
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/DriverStatusHubE2eTests.cs`
|
||||
|
||||
**Step 1:** Open a SignalR connection to `/hubs/driverstatus`, invoke `JoinDriver`, force a `DriverHealthChanged` via test seam (publish directly to the DPS topic), assert push received within 1s.
|
||||
|
||||
**Step 2:** Build + run. Commit. `test(adminui): E2E DriverStatusHub push`
|
||||
|
||||
### Task 10.4: Manual smoke checklist (documented, not automated)
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** none
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/plans/2026-05-28-adminui-driver-pages-design.md` — replace Section 8.3 stub with the actual checklist as run, with timestamps.
|
||||
|
||||
**Step 1:** Run the checklist (Section 8.3 of the design). Tick each item.
|
||||
|
||||
**Step 2:** Commit. `docs(plans): record AdminUI driver pages smoke-test results`
|
||||
|
||||
---
|
||||
|
||||
## Out-of-scope (documented follow-ups)
|
||||
|
||||
These are NOT part of this plan. Capture as separate work items after merge:
|
||||
|
||||
- Live OPC UA browse in OpcUaClient picker.
|
||||
- Live Galaxy hierarchy browse in Galaxy picker.
|
||||
- Historian.Wonderware tag list pulled from the historian store.
|
||||
- DriverStatusPanel history graphs + per-tag diagnostics.
|
||||
- Per-driver bespoke controls beyond Reconnect/Restart.
|
||||
- Polly resilience config typed-form (still a JSON textarea this PR).
|
||||
|
||||
---
|
||||
|
||||
## Cross-cutting verification (run before final PR)
|
||||
|
||||
1. `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — clean.
|
||||
2. `dotnet test ZB.MOM.WW.OtOpcUa.slnx` — clean.
|
||||
3. `lmxopcua-fix up modbus`, then run the manual smoke (10.4).
|
||||
4. Review `git diff --stat master..` — confirm scope matches plan (no surprise file changes).
|
||||
5. Confirm `OtOpcUa-docs-issues.md` shows no new XML-doc warnings introduced by the new code (run `commentchecker-aot` on the AdminUI + Drivers/* trees).
|
||||
@@ -0,0 +1,76 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-28-adminui-driver-pages-plan.md",
|
||||
"designPath": "docs/plans/2026-05-28-adminui-driver-pages-design.md",
|
||||
"tasks": [
|
||||
{"id": "0.1", "subject": "Create AdminUI test project + slnx entry + placeholder test", "status": "completed", "commit": "dc12c37"},
|
||||
|
||||
{"id": "1.1", "subject": "Driver.Modbus.Contracts — extract ModbusDriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "5058a56", "notes": "Has 1 ProjectReference to Modbus.Addressing (sibling zero-dep enum project) — design intent preserved."},
|
||||
{"id": "1.2", "subject": "Driver.AbCip.Contracts — extract AbCipDriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "b474d63", "notes": "AbCipDataType enum moved with Options; extensions split into runtime."},
|
||||
{"id": "1.3", "subject": "Driver.AbLegacy.Contracts — extract AbLegacyDriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "4902295", "notes": "AbLegacyDataType + AbLegacyPlcFamilyProfile also moved; extensions split."},
|
||||
{"id": "1.4", "subject": "Driver.S7.Contracts — extract S7DriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "9f62f2c", "notes": "Parallel S7CpuType enum (7 values) + S7CpuTypeMap in runtime; S7.Cli + 2 tests fixed for type change."},
|
||||
{"id": "1.5", "subject": "Driver.TwinCAT.Contracts — extract TwinCATDriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "a88721c", "notes": "TwinCATDataType enum moved; extensions split."},
|
||||
{"id": "1.6", "subject": "Driver.FOCAS.Contracts — extract FocasDriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "d892ab9", "notes": "FocasCncSeries + FocasDataType enums moved; extensions split."},
|
||||
{"id": "1.7", "subject": "Driver.OpcUaClient.Contracts — extract OpcUaClientDriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "5f0e048", "notes": "All 4 enums self-contained in options file; no NuGet types leaked."},
|
||||
{"id": "1.8", "subject": "Driver.Galaxy.Contracts — extract GalaxyDriverOptions", "status": "completed", "blockedBy": ["0.1"], "commit": "5ffbc42", "notes": "Moved from Config/ subdir to contracts root; namespace preserved."},
|
||||
{"id": "1.9", "subject": "Driver.Historian.Wonderware.Client.Contracts — extract options", "status": "completed", "blockedBy": ["0.1"], "commit": "8c0a320", "notes": "Pure record, primitives only."},
|
||||
{"id": "1.10", "subject": "Add ProbeTimeoutSeconds to all 9 Options classes + slnx validation", "status": "completed", "blockedBy": ["1.1","1.2","1.3","1.4","1.5","1.6","1.7","1.8","1.9"], "commit": "f2f6eeb"},
|
||||
|
||||
{"id": "2.1", "subject": "DriverFormShell.razor", "status": "completed", "blockedBy": ["0.1"], "commit": "85af126"},
|
||||
{"id": "2.2", "subject": "DriverIdentitySection.razor", "status": "completed", "blockedBy": ["0.1"], "commit": "1ff3875", "notes": "Bonus ValidationMessage tags added."},
|
||||
{"id": "2.3", "subject": "DriverResilienceSection.razor", "status": "completed", "blockedBy": ["0.1"], "commit": "a008530"},
|
||||
{"id": "2.4", "subject": "Wire shared sections into existing DriverEdit.razor", "status": "completed", "blockedBy": ["2.1","2.2","2.3"], "commit": "a28f4cd", "notes": "Net -74 lines; zero functional regression."},
|
||||
|
||||
{"id": "3.1", "subject": "DriverTypePicker.razor (route: /drivers/new)", "status": "completed", "blockedBy": ["2.4"], "commit": "c0ce5d0"},
|
||||
{"id": "3.2", "subject": "DriverEditRouter.razor with DynamicComponent dispatch","status": "completed", "blockedBy": ["2.4"], "commit": "55e8bf7"},
|
||||
{"id": "3.3", "subject": "Hand /drivers/new from DriverEdit to DriverTypePicker","status": "completed", "blockedBy": ["3.1"], "commit": "27b3a01", "notes": "Bundled with 3.4 — single commit removed both @page directives."},
|
||||
{"id": "3.4", "subject": "Hand /drivers/{id} from DriverEdit to DriverEditRouter (fallback to DriverEdit)", "status": "completed", "blockedBy": ["3.2","3.3"], "commit": "27b3a01"},
|
||||
|
||||
{"id": "4.0", "subject": "AdminUI csproj references all 9 Driver.*.Contracts", "status": "completed", "blockedBy": ["1.10","3.4"], "commit": "7014c93", "notes": "Inserted as a precondition for parallel 4.1-4.9 implementation."},
|
||||
{"id": "4.1", "subject": "ModbusDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "a3073d1"},
|
||||
{"id": "4.2", "subject": "AbCipDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "dc21cba"},
|
||||
{"id": "4.3", "subject": "AbLegacyDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "059a621"},
|
||||
{"id": "4.4", "subject": "S7DriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "5cad9b2"},
|
||||
{"id": "4.5", "subject": "TwinCatDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "dfbf679"},
|
||||
{"id": "4.6", "subject": "FocasDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "8149739"},
|
||||
{"id": "4.7", "subject": "OpcUaClientDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "efcc231"},
|
||||
{"id": "4.8", "subject": "GalaxyDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "a243cfd"},
|
||||
{"id": "4.9", "subject": "HistorianWonderwareDriverPage.razor + serialization test", "status": "completed", "blockedBy": ["4.0"], "commit": "2c16062"},
|
||||
{"id": "4.10","subject": "Wire all 9 typed pages into DriverEditRouter._componentMap", "status": "completed", "blockedBy": ["4.1","4.2","4.3","4.4","4.5","4.6","4.7","4.8","4.9"], "commit": "5f8fa70"},
|
||||
{"id": "4.11","subject": "Fixup: S7 Tags data-loss + missing FormModel tests (post-review)", "status": "completed", "blockedBy": ["4.10"], "commit": "c4086c2"},
|
||||
|
||||
{"id": "5.1", "subject": "Delete DriverEdit.razor + remove fallback in DriverEditRouter", "status": "completed", "blockedBy": ["4.1","4.2","4.3","4.4","4.5","4.6","4.7","4.8","4.9"], "commit": "a971db3"},
|
||||
|
||||
{"id": "6.1", "subject": "DriverHealthChanged DPS message contract", "status": "pending", "blockedBy": ["5.1"]},
|
||||
{"id": "6.2", "subject": "Publish DriverHealthChanged from each driver actor (IDriverHealthPublisher)", "status": "pending", "blockedBy": ["6.1"]},
|
||||
{"id": "6.3", "subject": "DriverStatusHub", "status": "pending", "blockedBy": ["6.1"]},
|
||||
{"id": "6.4", "subject": "DriverStatusSignalRBridge + InMemoryDriverStatusSnapshotStore", "status": "pending", "blockedBy": ["6.2","6.3"]},
|
||||
{"id": "6.5", "subject": "DriverStatusPanel.razor + wire into all 9 driver pages", "status": "pending", "blockedBy": ["6.4"]},
|
||||
|
||||
{"id": "7.1", "subject": "IDriverProbe interface + TestDriverConnect messages", "status": "pending", "blockedBy": ["5.1"]},
|
||||
{"id": "7.2", "subject": "AdminOperationsActor handler for TestDriverConnect", "status": "pending", "blockedBy": ["7.1"]},
|
||||
{"id": "7.3", "subject": "TCP probes (Modbus, AbCip, AbLegacy, S7)", "status": "pending", "blockedBy": ["7.1"]},
|
||||
{"id": "7.4", "subject": "Specialty probes (FOCAS, TwinCAT, OPCUA, Galaxy, Historian)", "status": "pending", "blockedBy": ["7.1"]},
|
||||
{"id": "7.5", "subject": "AdminProbeService + DriverTestConnectButton.razor + wire into pages", "status": "pending", "blockedBy": ["7.2","7.3","7.4"]},
|
||||
|
||||
{"id": "8.1", "subject": "RestartDriver + ReconnectDriver messages + AdminOperationsActor handlers", "status": "pending", "blockedBy": ["6.5","7.5"]},
|
||||
{"id": "8.2", "subject": "DriverOperator authorization policy + docs/Security.md update", "status": "pending", "blockedBy": ["6.5"]},
|
||||
{"id": "8.3", "subject": "Wire Reconnect/Restart buttons into DriverStatusPanel", "status": "pending", "blockedBy": ["8.1","8.2"]},
|
||||
|
||||
{"id": "9.1", "subject": "DriverTagPicker.razor modal shell", "status": "pending", "blockedBy": ["5.1"]},
|
||||
{"id": "9.2", "subject": "Modbus address picker body + unit test", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.3", "subject": "AbCip address picker body + unit test", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.4", "subject": "AbLegacy address picker body + unit test", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.5", "subject": "S7 address picker body + unit test", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.6", "subject": "TwinCat address picker body + unit test", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.7", "subject": "FOCAS address picker body + unit test", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.8", "subject": "OpcUaClient picker body (free-text NodeId)", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.9", "subject": "Galaxy picker body (free-text tag_name.AttributeName)", "status": "pending", "blockedBy": ["9.1"]},
|
||||
{"id": "9.10","subject": "Historian.Wonderware picker body + unit test", "status": "pending", "blockedBy": ["9.1"]},
|
||||
|
||||
{"id": "10.1", "subject": "DriverTestConnectE2eTests (Modbus/AbCip/S7 vs Docker sims)", "status": "pending", "blockedBy": ["8.3","9.10"]},
|
||||
{"id": "10.2", "subject": "DriverReconnectE2eTests", "status": "pending", "blockedBy": ["8.3","9.10"]},
|
||||
{"id": "10.3", "subject": "DriverStatusHubE2eTests", "status": "pending", "blockedBy": ["8.3","9.10"]},
|
||||
{"id": "10.4", "subject": "Manual smoke checklist (documented)", "status": "pending", "blockedBy": ["10.1","10.2","10.3"]}
|
||||
],
|
||||
"lastUpdated": "2026-05-28"
|
||||
}
|
||||
@@ -0,0 +1,313 @@
|
||||
# Live address browsers for OpcUaClient + Galaxy drivers — design
|
||||
|
||||
> **Status:** approved 2026-05-28. Implementation plan to follow via `writing-plans`.
|
||||
> **Builds on:** PR that shipped driver-specific AdminUI pages (commit `0d3ec46`).
|
||||
> Both `OpcUaClientAddressPickerBody.razor` and `GalaxyAddressPickerBody.razor` were
|
||||
> intentionally shipped as static stubs ("enter the string manually") with live
|
||||
> browse deferred to this follow-up.
|
||||
|
||||
**Goal:** Add lazy, ad-hoc browse trees to the OpcUaClient and Galaxy address pickers in the AdminUI, so operators can navigate the remote server's (or galaxy's) hierarchy and pick an address rather than typing it.
|
||||
|
||||
**Architecture:** A new `IDriverBrowser` abstraction registered per driver type (parallel to the runtime's `IDriverProbe`), with implementations housed in sibling `*.Browser` projects under `src/Drivers/`. AdminUI owns the live browse sessions in-process via a `BrowseSessionRegistry` singleton with a 2-minute idle TTL and an `IHostedService` reaper. Razor picker bodies talk to a scoped `IBrowserSessionService`; no actor messages on the hot path.
|
||||
|
||||
**Tech stack:** .NET 10 / Blazor Server / OPCFoundation.NetStandard.Opc.Ua.Client / `ZB.MOM.WW.MxGateway.Client` (sibling repo, lazy-browse API already shipped).
|
||||
|
||||
---
|
||||
|
||||
## 1. Architecture
|
||||
|
||||
### Abstraction
|
||||
|
||||
```csharp
|
||||
// Commons (shared)
|
||||
public interface IDriverBrowser {
|
||||
string DriverType { get; } // "OpcUaClient", "Galaxy", ...
|
||||
Task<IBrowseSession> OpenAsync(string configJson, CancellationToken ct);
|
||||
}
|
||||
|
||||
public interface IBrowseSession : IAsyncDisposable {
|
||||
Guid Token { get; }
|
||||
DateTime LastUsedUtc { get; }
|
||||
Task<IReadOnlyList<BrowseNode>> RootAsync(CancellationToken ct);
|
||||
Task<IReadOnlyList<BrowseNode>> ExpandAsync(string nodeId, CancellationToken ct);
|
||||
Task<IReadOnlyList<AttributeInfo>> AttributesAsync(string nodeId, CancellationToken ct); // empty for OPC UA
|
||||
}
|
||||
|
||||
public sealed record BrowseNode(
|
||||
string NodeId, // address persisted on commit
|
||||
string DisplayName,
|
||||
BrowseNodeKind Kind, // Folder | Leaf
|
||||
bool HasChildrenHint);
|
||||
|
||||
public sealed record AttributeInfo(
|
||||
string Name, // e.g. "DownloadPath"
|
||||
string DriverDataType,
|
||||
bool IsArray,
|
||||
string SecurityClass); // FreeAccess | Operate | Tune | Configure | ViewOnly
|
||||
|
||||
public enum BrowseNodeKind { Folder, Leaf }
|
||||
```
|
||||
|
||||
### Session lifecycle
|
||||
|
||||
1. Razor picker body calls `BrowserSessionService.OpenAsync(driverType, formJson)`
|
||||
2. Service resolves `IDriverBrowser` from DI by driver type, calls `OpenAsync(json)`
|
||||
3. Returns `IBrowseSession`; service registers it in `BrowseSessionRegistry` under a new `Guid` token
|
||||
4. Razor stores token, calls `RootAsync(token)` to populate the initial tree
|
||||
5. Each subsequent expand-click calls `ExpandAsync(token, nodeId)`
|
||||
6. Picker body's `IAsyncDisposable.DisposeAsync` fires `CloseAsync(token)` on tear-down
|
||||
7. `BrowseSessionReaper` (`IHostedService`) ticks every 30s, evicts any session where `(UtcNow - LastUsedUtc) > 2 min`, awaits `DisposeAsync`
|
||||
|
||||
The session genuinely has no value to other cluster nodes — it's tied to one circuit. Hosting it in-process avoids cross-cluster Ask latency on every folder click.
|
||||
|
||||
---
|
||||
|
||||
## 2. Components
|
||||
|
||||
### New projects
|
||||
|
||||
| Path | Purpose |
|
||||
|---|---|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser/` | OPC UA browser impl + session |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser/` | Galaxy browser impl + session |
|
||||
| `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Browser.Tests/` | Unit tests (use opc-plc fixture) |
|
||||
| `tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser.Tests/` | Unit tests (fake transport) |
|
||||
|
||||
Driver-specific browsers live in **sibling** projects so AdminUI doesn't drag the runtime `Driver.*` projects (and their full SDK chains) through a transitive reference.
|
||||
|
||||
### New abstractions
|
||||
|
||||
| Path | Purpose |
|
||||
|---|---|
|
||||
| `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Browsing/IDriverBrowser.cs` | Per-driver factory |
|
||||
| `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Browsing/IBrowseSession.cs` | Session contract |
|
||||
| `src/Core/ZB.MOM.WW.OtOpcUa.Commons/Browsing/BrowseNode.cs` | + `BrowseNodeKind` enum + `AttributeInfo` |
|
||||
|
||||
### AdminUI plumbing
|
||||
|
||||
| Path | Purpose |
|
||||
|---|---|
|
||||
| `src/Server/.../AdminUI/Browsing/BrowseSessionRegistry.cs` | Singleton, `ConcurrentDictionary<Guid, IBrowseSession>` |
|
||||
| `src/Server/.../AdminUI/Browsing/BrowseSessionReaper.cs` | `IHostedService`, 30s tick, 2 min idle TTL |
|
||||
| `src/Server/.../AdminUI/Browsing/IBrowserSessionService.cs` | Scoped DI service for Razor |
|
||||
| `src/Server/.../AdminUI/Browsing/BrowserSessionService.cs` | Impl: resolve driver, register session, enforce per-call timeouts |
|
||||
| `src/Server/.../AdminUI/Components/Shared/Drivers/DriverBrowseTree.razor` | Shared lazy tree component with per-node text filter |
|
||||
|
||||
### Modified files
|
||||
|
||||
| Path | Change |
|
||||
|---|---|
|
||||
| `src/Server/.../Pickers/OpcUaClientAddressPickerBody.razor` | Add Browse button + DriverBrowseTree; keep manual entry |
|
||||
| `src/Server/.../Pickers/GalaxyAddressPickerBody.razor` | Same shape + side-panel for attribute pick |
|
||||
| `src/Server/.../AdminUI/Program.cs` | Register `IDriverBrowser` services + registry + reaper |
|
||||
| `src/Drivers/.../OpcUaClient.Contracts/NamespaceMap.cs` | Extract from runtime `Driver.OpcUaClient` for shared use |
|
||||
| `ZB.MOM.WW.OtOpcUa.slnx` | Add the four new projects |
|
||||
|
||||
---
|
||||
|
||||
## 3. Data flow
|
||||
|
||||
**Open → tree → pick** (OpcUaClient as worked example; Galaxy identical except attribute side-panel before commit):
|
||||
|
||||
```
|
||||
Razor picker body BrowserSessionService IDriverBrowser Remote
|
||||
| | | |
|
||||
click Browse ────────► OpenAsync(driverType, json) ─► OpenAsync(json) ────────► connect + activate session
|
||||
| ◄──────────────── token (Guid) ◄───── ISession |
|
||||
| | | |
|
||||
render tree ─────────► RootAsync(token) ─────────────► session.RootAsync ─────► BrowseAsync(ObjectsFolder)
|
||||
| ◄──────────────── BrowseNode[] ◄───── refs |
|
||||
| | | |
|
||||
click folder ────────► ExpandAsync(token, nodeId) ──► session.ExpandAsync ───► BrowseAsync(nodeId)
|
||||
| ◄──────────────── BrowseNode[] ◄───── refs |
|
||||
| | | |
|
||||
click leaf + commit ─► CloseAsync(token) ─────────► session.DisposeAsync ───► CloseSession
|
||||
| | | |
|
||||
```
|
||||
|
||||
**Galaxy two-stage attribute pick:** after the user selects an object (Folder) in the tree, the picker body calls `AttributesAsync(token, tagName)` and renders the result as a side-panel. The user picks an attribute; the committed address is `tag_name.AttributeName`.
|
||||
|
||||
**Stable address format:**
|
||||
- OpcUaClient: `nsu=<uri>;<localid>` via `NamespaceMap.ToStableReference` — survives remote namespace-table reorder across restarts
|
||||
- Galaxy: `tag_name` (the globally unique system name) — already stable by definition
|
||||
|
||||
**Per-node text filter:** purely client-side over the already-loaded `node.Children`. No round-trip on filter input.
|
||||
|
||||
---
|
||||
|
||||
## 4. OpcUaClient browser specifics
|
||||
|
||||
### Connection
|
||||
- Reuses `OpcUaClientDriverOptions` (deserialize with `UnmappedMemberHandling.Skip`)
|
||||
- Builds a **separate** `ApplicationConfiguration` from the runtime driver — PKI root at `%LocalAppData%/OtOpcUa/adminui-browse-pki/` (separate cert store)
|
||||
- `ApplicationName = "OtOpcUa AdminUI Browse"`, `ApplicationUri = "urn:OtOpcUa:AdminUI:Browse"`
|
||||
- Endpoint selection: same `DiscoveryClient.GetEndpointsAsync` → filter `(policy, mode)` as the runtime driver
|
||||
- One endpoint only (no failover) — interactive use; user retries with different URL on failure
|
||||
- Bounded by `OpcUaClientDriverOptions.PerEndpointConnectTimeout` (clamped [5, 30]s)
|
||||
|
||||
### Namespace map
|
||||
- `NamespaceMap` class extracted to `OpcUaClient.Contracts` so both runtime and Browser projects share one impl
|
||||
- Browser builds the map from the live session on open; uses `ToStableReference` for outbound NodeIds; uses `TryResolve` for inbound
|
||||
|
||||
### Lazy browse
|
||||
- One level per click using `Session.BrowseAsync` + `BrowseNextAsync` continuation-point loop
|
||||
- `BrowseDescriptionCollection` filters to `NodeClass.Object | NodeClass.Variable`, `ResultMask = BrowseName | DisplayName | NodeClass`
|
||||
- `BrowseNode.HasChildrenHint = (Kind == Folder)` — heuristic; saves a per-node round-trip
|
||||
- Inside-session calls guarded by `SemaphoreSlim _gate` (same pattern as runtime driver — OPC UA `Session.BrowseAsync` not thread-safe)
|
||||
|
||||
### Cert handling
|
||||
- `AutoAcceptCertificates = true` honored with parity to runtime + log warning + per-session unwire on dispose
|
||||
- `AutoAcceptCertificates = false` + untrusted cert → `OpenAsync` fails with SDK error message in the UI
|
||||
|
||||
### Reconnect handling
|
||||
- None. Browse sessions are short-lived (2 min idle TTL). Keep-alive failure → UI surfaces error chip → user re-clicks Browse.
|
||||
|
||||
---
|
||||
|
||||
## 5. Galaxy browser specifics
|
||||
|
||||
### Connection
|
||||
- Reuses `GalaxyDriverOptions` (deserialize with `UnmappedMemberHandling.Skip`)
|
||||
- Opens `MxGatewaySession` with `ClientName = "OtOpcUa-AdminUI-Browse"` — distinct from runtime driver's name so the gateway can attribute load
|
||||
- Per-call gateway client built via `session.GalaxyRepository(opts.GalaxyName)`
|
||||
|
||||
### Lazy browse
|
||||
- Root: `client.BrowseAsync(new BrowseChildrenOptions(), ct)` → `IReadOnlyList<LazyBrowseNode>`
|
||||
- Expand: cached `LazyBrowseNode` lookup by `tag_name`, then `node.ExpandAsync(ct)` (gateway client handles paging internally)
|
||||
- No internal gate — `LazyBrowseNode.ExpandAsync` already has its own lock; gateway client is thread-safe across distinct calls
|
||||
|
||||
### Two-stage attribute pick
|
||||
- Galaxy `BrowseNode.Kind` is always `Folder` — leaves don't exist at tree level
|
||||
- When the user clicks an object node, picker body calls `AttributesAsync(token, tagName)` and shows the result as a side-panel listing `(Name, DriverDataType, IsArray, SecurityClass)`
|
||||
- On attribute click, committed address is `$"{tagName}.{attrName}"`
|
||||
- Backing call: either `BrowseChildrenOptions { IncludeAttributes = true }` filtered to the GobjectId, or a dedicated `GetAttributesAsync(GobjectId, ct)` — to be confirmed during plan write against the gateway client surface
|
||||
|
||||
### Filters in v1
|
||||
- Per-node text filter (client-side) for tree navigation
|
||||
- Server-side filters (`TagNameGlob`, `AlarmBearingOnly`, `HistorizedOnly`) deferred to a follow-up — easy to add later without breaking the wire (the session is constructed today with `new BrowseChildrenOptions()`)
|
||||
|
||||
---
|
||||
|
||||
## 6. Error handling, timeouts, TTL
|
||||
|
||||
### Failures
|
||||
- `OpenAsync` → catches `Exception`, logs Info, returns typed `BrowseOpenResult(Ok: false, Message, Token: Empty)`. UI shows red chip with truncated SDK message
|
||||
- `ExpandAsync` / `AttributesAsync` → same shape per-call. Failed branch shows error chip; rest of tree intact; session stays alive
|
||||
- `BrowseSessionNotFoundException` when token unknown (session reaped or never existed)
|
||||
|
||||
### Timeouts
|
||||
- Per-call expand/attributes: **20 s** via `CTS.CreateLinkedTokenSource(callerCt)` in `BrowserSessionService`
|
||||
- Session open: **30 s** ceiling; OPC UA reuses `PerEndpointConnectTimeout` (default 10 s), Galaxy hardcodes 30 s for `MxGatewaySession.OpenAsync`
|
||||
|
||||
### TTL & reaping
|
||||
- `LastUsedUtc` set on every `RootAsync`/`ExpandAsync`/`AttributesAsync`
|
||||
- Reaper: `IHostedService` with `PeriodicTimer(30s)`. On each tick: snapshot keys; for any session with `(UtcNow - LastUsedUtc) > 120s`: `TryRemove` then `await DisposeAsync` outside the dictionary
|
||||
- Concurrent `ExpandAsync` racing eviction → caller catches closed-session error → service translates to `BrowseSessionNotFoundException`
|
||||
- On AdminUI shutdown: `StopAsync` walks the registry once and disposes all sessions
|
||||
|
||||
### Concurrency
|
||||
- `BrowseSessionRegistry` = `ConcurrentDictionary<Guid, IBrowseSession>` — no extra lock
|
||||
- OpcUaClient session serializes browse on `SemaphoreSlim`; Galaxy session relies on its internal locks
|
||||
|
||||
### Component dispose
|
||||
- Razor picker body implements `IAsyncDisposable`
|
||||
- Fires `CloseAsync(token)` fire-and-forget (no await) so circuit teardown isn't blocked by a gRPC roundtrip
|
||||
- Reaper is the safety net if dispose doesn't fire
|
||||
|
||||
### Logging
|
||||
- Serilog. Info at open + close, Debug at close-with-reason (`user-close | idle-ttl | shutdown`), Info on failure
|
||||
- No per-expand logging (noise)
|
||||
|
||||
### Audit trail
|
||||
- None — browse is read-only and doesn't mutate config or driver state (matches probe pattern)
|
||||
|
||||
---
|
||||
|
||||
## 7. Security & auth
|
||||
|
||||
### Role gating
|
||||
- Browse button gated by existing `DriverOperator` LDAP policy — same as Reconnect/Restart in `DriverStatusPanel`
|
||||
- Picker bodies check policy in `OnInitializedAsync` via `IAuthorizationService` and `AuthenticationStateProvider`
|
||||
- Manual entry stays available regardless of role
|
||||
|
||||
### Credentials in JSON
|
||||
- Form JSON posted to `BrowserSessionService.OpenAsync` contains plaintext passwords / API keys — same as the existing `TestDriverConnect` probe
|
||||
- JSON is deserialized into typed Options → used to build SDK config → both released; no `_lastConfigJson` cached field anywhere in the registry or session impls
|
||||
- Browse session tokens are `Guid.NewGuid()` and only ever cross the authenticated Blazor circuit
|
||||
|
||||
### Cert handling
|
||||
- `AutoAcceptCertificates = true` honored with log warning + per-session unwire on dispose
|
||||
- Browse PKI store separate from runtime PKI — browse-time accept doesn't poison the runtime driver's trust store
|
||||
|
||||
### Rate limiting
|
||||
- None. DriverOperator role gating + 2-minute TTL is the budget. A bad actor with DriverOperator already has Reconnect/Restart capability
|
||||
|
||||
### Multi-replica AdminUI
|
||||
- Sticky cookies (already configured via Traefik) pin a user to one replica → `BrowseSessionRegistry` is always co-located with the circuit that created the token
|
||||
- Failover → token invalid on new replica → UI re-opens gracefully
|
||||
|
||||
---
|
||||
|
||||
## 8. Testing
|
||||
|
||||
### Unit tests — per-driver browsers
|
||||
- `tests/Drivers/.../OpcUaClient.Browser.Tests/`: against opc-plc at `opc.tcp://10.100.0.35:50000`. `OpcUaClientBrowseSessionTests`, `OpcUaClientDriverBrowserTests` (bad endpoint, auth rejected, bad JSON)
|
||||
- `tests/Drivers/.../Galaxy.Browser.Tests/`: fake `IGalaxyRepositoryClientTransport` (precedent in gateway-client repo). `GalaxyBrowseSessionTests`, `GalaxyDriverBrowserTests`
|
||||
|
||||
### Unit tests — AdminUI plumbing (added to existing `tests/Server/AdminUI.Tests/`)
|
||||
- `BrowseSessionRegistryTests`: register/get/remove, concurrent registration
|
||||
- `BrowseSessionReaperTests`: virtual time, idle eviction, non-idle preservation, eviction-vs-in-flight-expand race
|
||||
- `BrowserSessionServiceTests`: open→root→expand→close, unknown driver type, per-call timeout enforced
|
||||
|
||||
### Component tests
|
||||
- `DriverBrowseTree` lazy-expand contract with fake `IBrowserSessionService`; per-node filter filters DOM but does not call ExpandAsync; click caching
|
||||
- Picker bodies: Browse button hidden when `!_canOperate`; manual entry still works
|
||||
|
||||
### Integration tests (opt-in, fixture-gated)
|
||||
- `tests/Drivers/.../OpcUaClient.Browser.IntegrationTests/`: end-to-end against opc-plc, 3-level expand + round-trip resolve. Skipped unless `OPCUA_SIM_ENDPOINT` set
|
||||
- No Galaxy integration suite in v1 (requires wonder-app-vd03; deferred)
|
||||
|
||||
### Specific regression tests
|
||||
- Namespace-stable round-trip: open → browse → take returned NodeId string → `ExpandAsync(string)` → must resolve back to same NodeId
|
||||
- TTL reaper racing live ExpandAsync: `TryRemove` while expand is in-flight → safe, translates to `BrowseSessionNotFoundException`
|
||||
|
||||
### Verification at PR time
|
||||
- `dotnet build ZB.MOM.WW.OtOpcUa.slnx` clean
|
||||
- `dotnet test tests/Server/.../AdminUI.Tests/` green (existing 51 + new ~12)
|
||||
- `dotnet test tests/Drivers/.../OpcUaClient.Browser.Tests/` with `lmxopcua-fix up opcuaclient`
|
||||
- `dotnet test tests/Drivers/.../Galaxy.Browser.Tests/` (no fixture)
|
||||
- Manual smoke: run AdminUI, edit an OpcUaClient driver, click Browse against opc-plc, pick a variable, verify the stored NodeId reads cleanly via Client CLI
|
||||
|
||||
---
|
||||
|
||||
## 9. Implementation sequencing (for plan-writing)
|
||||
|
||||
Suggested phase split — each phase shippable + reviewable independently:
|
||||
|
||||
1. **Phase 1 — Abstractions.** Add `IDriverBrowser`, `IBrowseSession`, `BrowseNode`, `AttributeInfo`, `BrowseNodeKind` to Commons. Empty build.
|
||||
2. **Phase 2 — Extract NamespaceMap.** Move from runtime `Driver.OpcUaClient` to `Driver.OpcUaClient.Contracts`; update runtime ref.
|
||||
3. **Phase 3 — OpcUaClient browser.** New `Driver.OpcUaClient.Browser` project; impl + unit tests against opc-plc.
|
||||
4. **Phase 4 — Galaxy browser.** New `Driver.Galaxy.Browser` project; impl + unit tests with fake transport. Confirm attribute-fetch API surface on `GalaxyRepositoryClient`.
|
||||
5. **Phase 5 — AdminUI plumbing.** `BrowseSessionRegistry`, `BrowseSessionReaper`, `BrowserSessionService`, DI wire-up in `Program.cs`. Unit tests.
|
||||
6. **Phase 6 — Shared `DriverBrowseTree.razor`.** Lazy tree component with per-node filter. Component tests with fake service.
|
||||
7. **Phase 7 — Wire pickers.** Update `OpcUaClientAddressPickerBody.razor` and `GalaxyAddressPickerBody.razor` to use `DriverBrowseTree` + DriverOperator gating + (Galaxy) attribute side-panel. Manual smoke test.
|
||||
8. **Phase 8 — Integration test + docs.** Opt-in opc-plc integration suite, design doc cross-references in `docs/`, `CLAUDE.md` (or `docs/security.md`) updates if needed.
|
||||
|
||||
---
|
||||
|
||||
## Decisions table
|
||||
|
||||
| # | Decision | Rationale |
|
||||
|---|---|---|
|
||||
| 1 | Ad-hoc browse using form JSON | Mirrors `TestDriverConnect` probe; works for new drafts and existing drivers uniformly |
|
||||
| 2 | Tree + lazy load both drivers | Galaxy gateway just shipped `LazyBrowseNode.ExpandAsync` — symmetric UX possible |
|
||||
| 3 | AdminUI-hosted via `IDriverBrowser` factory | Browse is interactive (≥10 calls/session); cross-cluster Ask hop would multiply latency; session has no value to other nodes |
|
||||
| 4 | Sibling `*.Browser` projects | Keep AdminUI from pulling runtime `Driver.*` projects' SDK chains |
|
||||
| 5 | `NamespaceMap` to `OpcUaClient.Contracts` | Shared between runtime + browser, no new project needed |
|
||||
| 6 | Separate browse PKI store | Browse-time cert accept must not poison runtime driver's trust store |
|
||||
| 7 | Per-node client-side text filter (v1) | Quick UX win; server-side filters deferred |
|
||||
| 8 | 2 min idle TTL, 30s reaper tick | Matches typical user cadence; bounds resource exposure |
|
||||
| 9 | 20 s per-call / 30 s open timeouts | Interactive feel; longer hangs almost always mean broken remote |
|
||||
| 10 | DriverOperator role gating | Live remote connection is operationally privileged; matches Reconnect/Restart precedent |
|
||||
| 11 | No audit trail | Browse is read-only; matches probe pattern |
|
||||
| 12 | Galaxy two-stage attribute side-panel | One modal, no extra clicks vs. two-modal flow |
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-28-driver-browsers-plan.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 1: Phase 1 — Add IDriverBrowser/IBrowseSession/BrowseNode to Commons", "status": "pending"},
|
||||
{"id": 2, "subject": "Task 2: Phase 2 — Extract NamespaceMap to OpcUaClient.Contracts", "status": "pending", "blockedBy": [1]},
|
||||
{"id": 3, "subject": "Task 3: Phase 3a — Scaffold Driver.OpcUaClient.Browser project", "status": "pending", "blockedBy": [2]},
|
||||
{"id": 4, "subject": "Task 4: Phase 3b — Implement OpcUaClientBrowseSession", "status": "pending", "blockedBy": [3]},
|
||||
{"id": 5, "subject": "Task 5: Phase 3c — Implement OpcUaClientDriverBrowser factory", "status": "pending", "blockedBy": [4]},
|
||||
{"id": 6, "subject": "Task 6: Phase 3d — OpcUaClient.Browser tests (opc-plc fixture)", "status": "pending", "blockedBy": [5]},
|
||||
{"id": 7, "subject": "Task 7: Phase 4a — Scaffold Driver.Galaxy.Browser project", "status": "pending", "blockedBy": [1]},
|
||||
{"id": 8, "subject": "Task 8: Phase 4b — Implement GalaxyBrowseSession", "status": "pending", "blockedBy": [7]},
|
||||
{"id": 9, "subject": "Task 9: Phase 4c — Implement GalaxyDriverBrowser factory", "status": "pending", "blockedBy": [8]},
|
||||
{"id": 10, "subject": "Task 10: Phase 4d — Galaxy.Browser tests (fake transport)", "status": "pending", "blockedBy": [9]},
|
||||
{"id": 11, "subject": "Task 11: Phase 5a — BrowseSessionRegistry + reaper + service", "status": "pending", "blockedBy": [1]},
|
||||
{"id": 12, "subject": "Task 12: Phase 5b — Wire DI in AddAdminUI()", "status": "pending", "blockedBy": [5, 9, 11]},
|
||||
{"id": 13, "subject": "Task 13: Phase 5c — Tests for registry, reaper, service", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 14, "subject": "Task 14: Phase 6 — Shared DriverBrowseTree.razor", "status": "pending", "blockedBy": [12]},
|
||||
{"id": 15, "subject": "Task 15: Phase 7a — Wire OpcUaClient picker to browser", "status": "pending", "blockedBy": [14]},
|
||||
{"id": 16, "subject": "Task 16: Phase 7b — Wire Galaxy picker + attribute side-panel", "status": "pending", "blockedBy": [14]},
|
||||
{"id": 17, "subject": "Task 17: Phase 8a — opc-plc integration test", "status": "pending", "blockedBy": [6]},
|
||||
{"id": 18, "subject": "Task 18: Phase 8b — Manual smoke + CLAUDE.md update", "status": "pending", "blockedBy": [13, 15, 16, 17]}
|
||||
],
|
||||
"lastUpdated": "2026-05-28T00:00:00Z"
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
# Design — Complete AdminUI deferred follow-ups
|
||||
|
||||
**Date:** 2026-05-29
|
||||
**Status:** Approved (design); implementation plan to follow
|
||||
**Author:** Joseph Doherty (with Claude Code)
|
||||
|
||||
## Background
|
||||
|
||||
The AdminUI carried a family of "deferred / Phase C.2 follow-up" notes. A prior
|
||||
change stripped the stale *rendered roadmap banners* from the cluster list pages.
|
||||
Three remaining note groups were investigated to decide what real work they hide:
|
||||
|
||||
- **Group 1 — driver-page inline notes** ("list-editor coming in a follow-up
|
||||
phase" for tags/devices/endpoints; "typed-form-ifying Polly is a follow-up").
|
||||
→ **Real pending UI work.**
|
||||
- **Group 2 — RoleGrants** ("UI-driven editing of the mapping is deferred — it
|
||||
implies a config-reload mechanism that doesn't exist yet"). → **Real work; half
|
||||
the infra already exists.**
|
||||
- **Group 3 — source comments** (F15 Razor migration, F16 FleetStatusHub bridge,
|
||||
"Phase 4" identity section, `TODO(3.3/3.4)` route collision). → **~90% stale**;
|
||||
the referenced work already shipped (the F16 bridge is wired; the legacy
|
||||
`DriverEdit.razor` no longer exists). Only the Polly typed form is real, and it
|
||||
is already counted in Group 1.
|
||||
|
||||
### Key facts established during exploration
|
||||
|
||||
- **Driver-embedded tag/device lists in `DriverConfig` JSON are the runtime source
|
||||
of truth.** Driver factories deserialize them and poll exactly those rows; the
|
||||
canonical `Tag` table is orthogonal (OPC UA browse-tree only, never read by
|
||||
drivers). So inline editors are meaningful, not redundant — editing them changes
|
||||
what the driver polls on the next publish/reinitialize.
|
||||
- **Resilience** already has a strongly-typed model: `DriverResilienceOptions`
|
||||
(`BulkheadMaxConcurrent`, `BulkheadMaxQueue`, `RecycleIntervalSeconds`,
|
||||
`CapabilityPolicies: {DriverCapability → (TimeoutSeconds, RetryCount,
|
||||
BreakerFailureThreshold)}`) with tier A/B/C defaults via `GetTierDefaults(tier)`
|
||||
and a `DriverResilienceOptionsParser`. The stored JSON is an *override* shape;
|
||||
null/absent keys fall back to tier defaults.
|
||||
- **LDAP role map**: the `LdapGroupRoleMapping` entity + migration +
|
||||
`ILdapGroupRoleMappingService` (CRUD) already exist but are **not wired** into
|
||||
login. `LdapAuthService` still reads the static appsettings `GroupToRole`
|
||||
(`Dictionary<string,string>`). `RoleGrants.razor` is read-only.
|
||||
- **Testing**: no bUnit. Established pattern = test `FromOptions`/`ToOptions`
|
||||
round-trips (xUnit + Shouldly in `AdminUI.Tests`) and services with in-memory EF
|
||||
(`Configuration.Tests`).
|
||||
|
||||
## Decisions
|
||||
|
||||
- **Scope:** full build — all real follow-ups in Groups 1 & 2, plus Group 3
|
||||
comment cleanup.
|
||||
- **List-editor UX:** modal-per-row with a shared shell component.
|
||||
- **LDAP reload semantics:** DB-backed, **live on the user's next sign-in**
|
||||
(per-login DB query; no restart, no new infra). appsettings `GroupToRole` becomes
|
||||
a bootstrap **fallback** layer.
|
||||
- **Roles are GLOBAL.** No cluster-level permissions / no per-cluster enforcement
|
||||
(explicitly chosen for simplicity, reversing an earlier cluster-scoping answer).
|
||||
Every `LdapGroupRoleMapping` row is `IsSystemWide=true`, `ClusterId=null`.
|
||||
|
||||
## Workstreams
|
||||
|
||||
### WS1 — Driver collection editors (modal-per-row + shared shell)
|
||||
|
||||
- New generic `CollectionEditor<TRow>` component in `Components/Shared/Drivers/`:
|
||||
compact read-only table + `[+ Add]` / per-row `Edit` / `Delete`, and a Bootstrap
|
||||
modal editing a **working copy** of a row (commit on modal-Save, discard on
|
||||
Cancel). Parameters: `List<TRow> Items` (bound), header fragment, read-only-cells
|
||||
fragment, modal-body fragment, `NewRow` factory, optional `Validate` delegate.
|
||||
- Each driver page swaps its read-only `<pre>` for a `CollectionEditor` supplying
|
||||
its own columns + modal fields. Edits mutate the in-memory `List<T>` already in
|
||||
the page's `FormModel`; the page's existing **Save** serializes it into
|
||||
`DriverConfig` — no new persistence path.
|
||||
- Coverage: tags (Modbus, AbCip, AbLegacy, TwinCAT, S7, FOCAS); devices (AbCip,
|
||||
AbLegacy, TwinCAT, FOCAS); endpoints (OpcUaClient).
|
||||
- **Errors/validation:** required fields, duplicate Name within list,
|
||||
driver-specific address format; delete confirm; list mutates only on valid commit.
|
||||
- **Testing:** per-driver `NewRow` factories + `Validate` methods unit-tested
|
||||
directly; existing `*FormSerializationTests` extended for add/remove via the form
|
||||
model. Modal interaction verified manually via `/run`.
|
||||
|
||||
### WS2 — Resilience typed form
|
||||
|
||||
- Replace the textarea in `DriverResilienceSection.razor` with a typed form bound to
|
||||
a new mutable `ResilienceFormModel` (all fields nullable; null = tier default):
|
||||
bulkhead concurrent/queue, recycle interval, and an 8-capability grid (Read,
|
||||
Write, Discover, Subscribe, Probe, AlarmSubscribe, AlarmAcknowledge, HistoryRead)
|
||||
of (timeout / retry / breaker-threshold).
|
||||
- `FromJson`/`ToJson` emit only non-null overrides (blank → `null`, preserving the
|
||||
current "null = tier defaults" contract). The section gains a `DriverTier`
|
||||
parameter; each driver page passes its known tier so `GetTierDefaults(tier)`
|
||||
renders as placeholders. A collapsible "raw JSON" view remains as escape hatch.
|
||||
- **Errors:** non-negative / sane-range numeric validation; emitted JSON must
|
||||
re-parse cleanly through `DriverResilienceOptionsParser`.
|
||||
- **Testing:** `ResilienceFormModel` round-trip tests in `AdminUI.Tests` —
|
||||
blank→null, partial-override-preserved, emit→parse-back compatibility.
|
||||
|
||||
### WS3 — Editable LDAP→role map (DB-backed, global, live on next sign-in)
|
||||
|
||||
- `RoleGrants.razor` → full CRUD over `LdapGroupRoleMapping` via the existing
|
||||
`ILdapGroupRoleMappingService`. **Global only**: `IsSystemWide=true`,
|
||||
`ClusterId=null`; no cluster UI. Fields: LDAP group, `AdminRole`
|
||||
(ConfigViewer/ConfigEditor/FleetAdmin), notes. A group may carry several roles
|
||||
(multiple rows). Edit page gated to **FleetAdmin** (add a minimal FleetAdmin
|
||||
authorization policy; confirm existing role-policy plumbing during plan-writing).
|
||||
- Wire the service into `LdapAuthService`: at login → resolve groups →
|
||||
`GetByGroupsAsync` (indexed) → map roles → **merge appsettings `GroupToRole` as a
|
||||
fallback layer** (used when no DB row covers a group). Edits take effect on the
|
||||
user's next sign-in. DB rows authoritative + editable; appsettings entries shown
|
||||
read-only as "fallback."
|
||||
- **Errors:** DB unreachable at login → catch, log, fall back to appsettings;
|
||||
login never blocks. CRUD: no duplicate `(LdapGroup, Role)`; group/role required.
|
||||
- **Testing:** extend `LdapGroupRoleMappingServiceTests` (in-memory EF) for CRUD +
|
||||
dedupe; new `RoleMapper` overload `Map(groups, dbRows, fallbackDict)` unit-tested
|
||||
for merge + fallback precedence + DB-error fallback.
|
||||
|
||||
### WS4 — Cleanup (runs last, after the features exist)
|
||||
|
||||
- **Delete stale comments:** `FleetStatusHub.cs` ("passive channel / until the
|
||||
bridge lands"), `EndpointRouteBuilderExtensions.cs` (F15), `DriverIdentitySection.razor`
|
||||
("Phase 4 / generic DriverEdit"), `DriverEditRouter.razor` + `DriverTypePicker.razor`
|
||||
(`TODO(3.3/3.4)` + the "falls back to legacy DriverEdit" path — verify & clean,
|
||||
legacy file is gone), and update `DriverResilienceSection.razor`'s comment.
|
||||
- **Strip rendered notes** now true: per-driver "list-editor coming in a follow-up
|
||||
phase" notes, the OpcUaClient endpoint note, the resilience "typed-form-ifying
|
||||
Polly is a follow-up" note, and the RoleGrants "UI-driven editing is deferred" note.
|
||||
|
||||
## Cross-cutting
|
||||
|
||||
- **No DB schema change** — `LdapGroupRoleMapping` migration already applied;
|
||||
`DriverConfig`/`ResilienceConfig` columns unchanged.
|
||||
- **Definition of done:** build clean + `dotnet test` green + a `/run` pass
|
||||
exercising the modal editors and role-map CRUD.
|
||||
- **Suggested sequence:** WS1 shared shell + Modbus tags as proof → remaining
|
||||
drivers → WS2 → WS3 → WS4.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-29-adminui-followups.md",
|
||||
"branch": "feat/adminui-followups",
|
||||
"tasks": [
|
||||
{"id": 11, "plan": 1, "subject": "Task 1: Generic CollectionEditor<TRow> component", "status": "pending"},
|
||||
{"id": 12, "plan": 2, "subject": "Task 2: Modbus tag editor (proof) + tests", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 13, "plan": 3, "subject": "Task 3: AbCip device+tag editors + tests", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 14, "plan": 4, "subject": "Task 4: AbLegacy device+tag editors + tests", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 15, "plan": 5, "subject": "Task 5: TwinCAT device+tag editors + tests", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 16, "plan": 6, "subject": "Task 6: FOCAS device+tag editors + tests", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 17, "plan": 7, "subject": "Task 7: S7 tag editor + tests", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 18, "plan": 8, "subject": "Task 8: OpcUaClient endpoint-URL editor + tests", "status": "pending", "blockedBy": [11]},
|
||||
{"id": 19, "plan": 9, "subject": "Task 9: ResilienceFormModel + tests", "status": "pending"},
|
||||
{"id": 20, "plan": 10, "subject": "Task 10: Typed resilience form in DriverResilienceSection", "status": "pending", "blockedBy": [19]},
|
||||
{"id": 21, "plan": 11, "subject": "Task 11: RoleMapper.Merge overload + tests", "status": "pending"},
|
||||
{"id": 22, "plan": 12, "subject": "Task 12: Register ILdapGroupRoleMappingService in DI", "status": "pending"},
|
||||
{"id": 23, "plan": 13, "subject": "Task 13: Wire DB merge into AuthEndpoints.LoginAsync", "status": "pending", "blockedBy": [21, 22]},
|
||||
{"id": 24, "plan": 14, "subject": "Task 14: Add FleetAdmin authorization policy", "status": "pending"},
|
||||
{"id": 25, "plan": 15, "subject": "Task 15: RoleGrants.razor global CRUD (FleetAdmin-gated)", "status": "pending", "blockedBy": [22, 24]},
|
||||
{"id": 26, "plan": 16, "subject": "Task 16: LdapGroupRoleMapping service tests (global CRUD)", "status": "pending"},
|
||||
{"id": 27, "plan": 17, "subject": "Task 17: Delete stale source comments", "status": "pending", "blockedBy": [12, 13, 14, 15, 16, 17, 18, 20, 25]},
|
||||
{"id": 28, "plan": 18, "subject": "Task 18: Strip now-true rendered notes", "status": "pending", "blockedBy": [12, 13, 14, 15, 16, 17, 18, 25]},
|
||||
{"id": 29, "plan": 19, "subject": "Task 19: Full verification (build + test + /run)", "status": "pending", "blockedBy": [20, 23, 26, 27, 28]}
|
||||
],
|
||||
"lastUpdated": "2026-05-29"
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
# Auth/login alignment with ScadaBridge — design
|
||||
|
||||
> **Status:** approved 2026-05-29. Implementation plan to follow via `writing-plans`.
|
||||
> **Trigger:** browser hitting `http://localhost:9200/` rendered Chrome's `HTTP_RESPONSE_CODE_FAILURE` page because the cookie scheme's `OnRedirectToLogin` event was overridden to return 401 with no body, and the parallel JwtBearer scheme stamped `WWW-Authenticate: Bearer`. ScadaBridge sets `LoginPath` and lets the framework do its built-in browser-vs-AJAX heuristic; OtOpcUa diverged.
|
||||
|
||||
**Goal:** Restore default browser-redirect ergonomics on protected GETs, retire the unused JwtBearer server-side scheme, and externalize cookie config — bringing OtOpcUa's auth structure into parity with ScadaBridge.
|
||||
|
||||
**Architecture:** Single Cookie auth scheme. The JWT keeps minting (via `JwtTokenService`) and validating (in `CookieAuthenticationStateProvider`) as the **cookie payload only**; no `AddJwtBearer`, no parallel `Authorization: Bearer` validation. Cookie config (`Name`, `ExpiryMinutes`, `RequireHttpsCookie`) flows through the existing-but-unused `OtOpcUaCookieOptions` via a `Configure<IOptions<OtOpcUaCookieOptions>, ILoggerFactory>` PostConfigure step — same pattern ScadaBridge uses.
|
||||
|
||||
**Tech stack:** .NET 10 / ASP.NET Core / `Microsoft.AspNetCore.Authentication.Cookies` only (drop `Microsoft.AspNetCore.Authentication.JwtBearer` from the wiring if its only remaining transitive use disappears with this change).
|
||||
|
||||
---
|
||||
|
||||
## 1. Architecture
|
||||
|
||||
### Schemes
|
||||
|
||||
| Before | After |
|
||||
|---|---|
|
||||
| Cookie (primary) + JwtBearer (parallel) | Cookie only |
|
||||
| `FallbackPolicy` lists both schemes | `FallbackPolicy` lists Cookie only |
|
||||
| `OnRedirectToLogin` overridden to 401 | default behavior: 302 for browsers, 401 for AJAX |
|
||||
| `OnRedirectToAccessDenied` overridden to 403 | default behavior: 302 to `/Account/AccessDenied` (404s today; matches ScadaBridge) |
|
||||
|
||||
### Cookie config — externalized via `OtOpcUaCookieOptions`
|
||||
|
||||
```csharp
|
||||
public sealed class OtOpcUaCookieOptions
|
||||
{
|
||||
public const string SectionName = "Security:Cookie";
|
||||
|
||||
public string Name { get; set; } = "ZB.MOM.WW.OtOpcUa.Auth";
|
||||
public int ExpiryMinutes { get; set; } = 30;
|
||||
public bool RequireHttpsCookie { get; set; } = true;
|
||||
}
|
||||
```
|
||||
|
||||
Wired into `CookieAuthenticationOptions` via:
|
||||
```csharp
|
||||
services.AddOptions<CookieAuthenticationOptions>(CookieAuthenticationDefaults.AuthenticationScheme)
|
||||
.Configure<IOptions<OtOpcUaCookieOptions>, ILoggerFactory>((cookieOpts, ourOpts, lf) =>
|
||||
{
|
||||
cookieOpts.Cookie.Name = ourOpts.Value.Name;
|
||||
cookieOpts.ExpireTimeSpan = TimeSpan.FromMinutes(ourOpts.Value.ExpiryMinutes);
|
||||
cookieOpts.SlidingExpiration = true;
|
||||
cookieOpts.Cookie.SecurePolicy = ourOpts.Value.RequireHttpsCookie
|
||||
? CookieSecurePolicy.Always
|
||||
: CookieSecurePolicy.SameAsRequest;
|
||||
if (!ourOpts.Value.RequireHttpsCookie)
|
||||
{
|
||||
lf.CreateLogger("ZB.MOM.WW.OtOpcUa.Security").LogWarning(
|
||||
"Security:Cookie:RequireHttpsCookie is DISABLED — auth cookie SecurePolicy is SameAsRequest. " +
|
||||
"Cookie travels in cleartext over plain HTTP. Dev-only.");
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Endpoint surface — unchanged
|
||||
|
||||
| Path | Auth | Behavior |
|
||||
|---|---|---|
|
||||
| `POST /auth/login` | AllowAnonymous | LDAP auth → SignInAsync(Cookie); JSON callers get 204 / 401 / 503, form posters get 302 + cookie |
|
||||
| `POST /auth/logout` | RequireAuthorization | SignOutAsync(Cookie) |
|
||||
| `GET /auth/ping` | AllowAnonymous (handler-returns 200/401) | Polled by Blazor every 60s |
|
||||
| `POST /auth/token` | RequireAuthorization | Mints JWT for hypothetical external callers (matches ScadaBridge — they keep this even without JwtBearer wired) |
|
||||
|
||||
### Cookie rename
|
||||
|
||||
Old: `OtOpcUa.Auth`. New: `ZB.MOM.WW.OtOpcUa.Auth`. Effect: all sessions in flight at deploy time are invisible to the new handler → users re-prompt for login on next protected GET. No security impact (the old cookie expires per its own sliding window; nothing reads it).
|
||||
|
||||
---
|
||||
|
||||
## 2. Components
|
||||
|
||||
### Files modified
|
||||
|
||||
| File | Change |
|
||||
|---|---|
|
||||
| `src/Server/.../Security/CookieOptions.cs` | Add `RequireHttpsCookie`; change `Name` default to `ZB.MOM.WW.OtOpcUa.Auth` |
|
||||
| `src/Server/.../Security/ServiceCollectionExtensions.cs` | Drop `using JwtBearer`; delete `ConfigureJwtBearerFromTokenService` class; drop `.AddJwtBearer` + its IPostConfigureOptions registration; drop `OnRedirectToLogin` / `OnRedirectToAccessDenied` overrides; add `LoginPath` + `LogoutPath`; add PostConfigure block consuming `OtOpcUaCookieOptions`; remove `JwtBearerDefaults.AuthenticationScheme` from `FallbackPolicy` builder |
|
||||
| `tests/Server/.../Security.Tests/AuthEndpointsIntegrationTests.cs` | Update the `Set-Cookie` assertion on the login-success test from `OtOpcUa.Auth=` → `ZB.MOM.WW.OtOpcUa.Auth=` |
|
||||
|
||||
### Files NOT modified
|
||||
|
||||
| File | Why |
|
||||
|---|---|
|
||||
| `Endpoints/AuthEndpoints.cs` | Endpoint contracts unchanged |
|
||||
| `Jwt/JwtTokenService.cs` | Still mints JWT into cookie payload |
|
||||
| `Blazor/CookieAuthenticationStateProvider.cs` | Still polls `/auth/ping` |
|
||||
| `Ldap/*` | Untouched |
|
||||
| Razor login page | POST target unchanged |
|
||||
| `appsettings*.json` | Defaults are production-safe; no required config edit |
|
||||
|
||||
### Tests added
|
||||
|
||||
Single new file or appended class in `tests/Server/.../Security.Tests/`:
|
||||
|
||||
```csharp
|
||||
public class AuthChallengeTests : AuthEndpointsTestBase
|
||||
{
|
||||
[Fact]
|
||||
public async Task Root_anonymous_browser_GET_redirects_to_login()
|
||||
{
|
||||
var client = NewClient(allowAutoRedirect: false);
|
||||
client.DefaultRequestHeaders.Accept.ParseAdd("text/html");
|
||||
var resp = await client.GetAsync("/", Ct);
|
||||
resp.StatusCode.ShouldBe(HttpStatusCode.Found); // 302
|
||||
resp.Headers.Location!.ToString().ShouldContain("/login");
|
||||
resp.Headers.Location.ToString().ShouldContain("ReturnUrl");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Root_anonymous_xhr_GET_returns_401()
|
||||
{
|
||||
var client = NewClient(allowAutoRedirect: false);
|
||||
client.DefaultRequestHeaders.Add("X-Requested-With", "XMLHttpRequest");
|
||||
var resp = await client.GetAsync("/", Ct);
|
||||
resp.StatusCode.ShouldBe(HttpStatusCode.Unauthorized);
|
||||
// Framework still writes a Location header alongside the 401 — AJAX clients ignore it.
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Framework reality vs. earlier hypothesis:** The ASP.NET Core cookie handler's `IsAjaxRequest` heuristic checks ONLY the `X-Requested-With: XMLHttpRequest` header, NOT the `Accept` content type. A request with `Accept: application/json` but no XHR header is classified as a browser → 302. The third test originally proposed (`Root_anonymous_json_GET_returns_401`) was dropped because it tests behavior the framework doesn't have. ScadaBridge accepts the same framework reality (it doesn't override the heuristic either).
|
||||
|
||||
### Package references
|
||||
|
||||
`src/Server/ZB.MOM.WW.OtOpcUa.Security/ZB.MOM.WW.OtOpcUa.Security.csproj`: remove `<PackageReference Include="Microsoft.AspNetCore.Authentication.JwtBearer" />` if grep confirms `JwtTokenService` doesn't itself need it (it uses `Microsoft.IdentityModel.Tokens` for validation parameters, separate package).
|
||||
|
||||
---
|
||||
|
||||
## 3. Data flow
|
||||
|
||||
### Anonymous browser hits `/`
|
||||
|
||||
```
|
||||
Browser → GET /
|
||||
Accept: text/html
|
||||
┌──> AuthN: no cookie → unauthenticated
|
||||
├──> AuthZ FallbackPolicy fails
|
||||
└──> Cookie HandleChallengeAsync:
|
||||
- Accept: text/html → browser
|
||||
- 302 Location: /login?ReturnUrl=%2F
|
||||
Browser → GET /login ← redirect followed; login page renders (AllowAnonymous)
|
||||
[user submits form]
|
||||
Browser → POST /auth/login Content-Type: application/x-www-form-urlencoded
|
||||
─── LoginAsync:
|
||||
- LDAP authenticate
|
||||
- SignInAsync(Cookie)
|
||||
- Set-Cookie: ZB.MOM.WW.OtOpcUa.Auth=...
|
||||
- 302 Location: / (or ReturnUrl)
|
||||
Browser → GET / cookie present → AuthZ passes → 200 + Razor render
|
||||
```
|
||||
|
||||
### XHR / fetch hits a protected endpoint without cookie
|
||||
|
||||
```
|
||||
fetch('/api/something') Accept: application/json
|
||||
X-Requested-With: XMLHttpRequest
|
||||
┌──> AuthN: no cookie → unauthenticated
|
||||
├──> AuthZ FallbackPolicy fails
|
||||
└──> Cookie HandleChallengeAsync:
|
||||
- not text/html → API client
|
||||
- 401 (no body, no Location)
|
||||
```
|
||||
|
||||
The cookie handler's built-in `IsAjaxRequest` heuristic is what makes this work — it looks for `X-Requested-With: XMLHttpRequest`. No custom event handler needed. Note: requests with only `Accept: application/json` (no XHR header) are classified as browsers → 302; AJAX callers should set the XHR header to get 401.
|
||||
|
||||
### Logout
|
||||
|
||||
```
|
||||
fetch('/auth/logout', POST) cookie present
|
||||
─── LogoutAsync (RequireAuthorization passes):
|
||||
- SignOutAsync(Cookie)
|
||||
- Set-Cookie: ZB.MOM.WW.OtOpcUa.Auth=; expires=...
|
||||
- 204 (or browser-form: 302 /login)
|
||||
```
|
||||
|
||||
### Old cookie ignored
|
||||
|
||||
Browser holds stale `OtOpcUa.Auth` from a session that predates the deploy. Cookie scheme is now configured for `ZB.MOM.WW.OtOpcUa.Auth` — old cookie is invisible. User treated as anonymous → 302 to `/login`. Old cookie sits in jar until its own sliding window expires (max 30 min); no security risk because nothing reads it.
|
||||
|
||||
### Blazor `/auth/ping` polling
|
||||
|
||||
```
|
||||
CookieAuthenticationStateProvider → GET /auth/ping every 60s
|
||||
cookie present → 200
|
||||
cookie expired/missing → 401
|
||||
Blazor → invalidates auth state → re-render → root [Authorize] fails
|
||||
→ Cookie HandleChallengeAsync → 302 /login
|
||||
```
|
||||
|
||||
Unchanged.
|
||||
|
||||
---
|
||||
|
||||
## 4. Error handling
|
||||
|
||||
| Surface | Behavior |
|
||||
|---|---|
|
||||
| Unknown `Accept` (`*/*`, missing, JSON) | Framework default: treated as non-AJAX → 302 to `/login`. The cookie handler's `IsAjaxRequest` only looks at `X-Requested-With`, NOT `Accept`. CLI tools that want a 401 should set `X-Requested-With: XMLHttpRequest`. |
|
||||
| `LoginAsync` bad creds | JSON: `401`. Form: `302 /login?error=…&returnUrl=…`. Handler-returned, unaffected by middleware changes. |
|
||||
| `LoginAsync` LDAP throws | `503 ServiceUnavailable`. Handler-returned. |
|
||||
| `LoginAsync` success | JSON: `204`. Form: `302 /` (or `ReturnUrl`). |
|
||||
| Cookie expires mid-request | Treated as anonymous → 302 to `/login` (browser) or 401 (AJAX). Active users kept alive by `SlidingExpiration = true`. |
|
||||
| `RequireHttpsCookie = false` over HTTPS | Cookie marked `SecurePolicy = SameAsRequest`. Misconfiguration risk; startup logs Warning every boot so it's audible. No validator-refused boot — default is `true`; dev compose explicitly opts out. |
|
||||
| Missing `Security:Cookie` section in config | `.Bind()` no-ops; defaults take over (`Name = ZB.MOM.WW.OtOpcUa.Auth`, `ExpiryMinutes = 30`, `RequireHttpsCookie = true`). Production-safe. |
|
||||
| `[Authorize(Policy="DriverOperator")]` denied for authenticated non-operator | Cookie handler redirects to default `AccessDeniedPath = "/Account/AccessDenied"` which 404s in OtOpcUa. Matches ScadaBridge; rare enough not to be a P0. Follow-up: add a minimal `/access-denied` Razor page. |
|
||||
|
||||
---
|
||||
|
||||
## 5. Testing
|
||||
|
||||
### Existing tests pass unchanged
|
||||
|
||||
- `Login_with_invalid_credentials_returns_401` — handler-returned, unaffected
|
||||
- `Login_when_ldap_throws_returns_503` — handler-returned, unaffected
|
||||
- `Ping_anonymous_returns_401` — handler-returned, unaffected
|
||||
- `Ping_after_cookie_login_returns_200` — uses HttpClient cookie container, picks up renamed cookie automatically
|
||||
- `Login_with_cookie_credentials_returns_204_and_sets_cookie` — needs one assertion update (cookie name)
|
||||
|
||||
### Tests added (3 new)
|
||||
|
||||
- `Root_anonymous_browser_GET_redirects_to_login` — asserts 302 + `Location` contains `/login` + `ReturnUrl`
|
||||
- `Root_anonymous_ajax_GET_returns_401` — `X-Requested-With: XMLHttpRequest` → 401, no `Location`
|
||||
(the originally planned `Root_anonymous_json_GET_returns_401` was dropped — see Section 3 framework-reality note above)
|
||||
|
||||
### Removed/orphaned tests
|
||||
|
||||
None expected. The explore phase found no test depending on `ConfigureJwtBearerFromTokenService` or the `WWW-Authenticate: Bearer` response. Grep at plan-write time to confirm.
|
||||
|
||||
### Manual smoke (docker-dev stack)
|
||||
|
||||
1. `http://localhost:9200/` anonymously → expect 302 to `/login?ReturnUrl=%2F` (was: Chrome error page)
|
||||
2. Sign in via the form
|
||||
3. `http://localhost:9200/` authenticated → expect Razor dashboard
|
||||
4. DevTools → Application → Cookies → confirm `ZB.MOM.WW.OtOpcUa.Auth`
|
||||
5. `curl -i http://localhost:9200/` → `302 Found`, Location: `/login?ReturnUrl=%2F`
|
||||
6. `curl -i -H "Accept: application/json" http://localhost:9200/` → `401 Unauthorized`
|
||||
|
||||
### Verification gates at PR time
|
||||
|
||||
- `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — zero new errors (pre-existing 12 unchanged)
|
||||
- `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/` — all green
|
||||
- `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.AdminUI.Tests/` — all green
|
||||
- Manual Chrome smoke above passes
|
||||
|
||||
---
|
||||
|
||||
## 6. Sequencing (for plan-writing)
|
||||
|
||||
Single-PR feature, but split into reviewable phases:
|
||||
|
||||
1. **Phase 1 — Options class.** Extend `OtOpcUaCookieOptions` with `RequireHttpsCookie` and new `Name` default. Tests unaffected.
|
||||
2. **Phase 2 — Wiring rewrite.** Edit `ServiceCollectionExtensions.cs`: drop JwtBearer, drop event overrides, add `LoginPath`/`LogoutPath`, add PostConfigure consumption of `OtOpcUaCookieOptions`. Update the one existing test assertion. Build + existing Security.Tests green.
|
||||
3. **Phase 3 — New challenge tests.** Add the 3 new redirect/401 tests.
|
||||
4. **Phase 4 — Package cleanup.** Remove `Microsoft.AspNetCore.Authentication.JwtBearer` from csproj if grep confirms no remaining consumer.
|
||||
5. **Phase 5 — Manual smoke + commit.** Restart admin-a/admin-b in docker-dev; verify in Chrome.
|
||||
|
||||
---
|
||||
|
||||
## Decisions table
|
||||
|
||||
| # | Decision | Rationale |
|
||||
|---|---|---|
|
||||
| 1 | Drop JwtBearer server-side scheme | No in-repo consumer; brought non-redirect 401 + `WWW-Authenticate: Bearer` to browser GETs |
|
||||
| 2 | Keep `JwtTokenService` + `/auth/token` | Token-as-cookie-payload is load-bearing for Blazor; `/auth/token` matches ScadaBridge surface |
|
||||
| 3 | Rename cookie `OtOpcUa.Auth` → `ZB.MOM.WW.OtOpcUa.Auth` | Naming parity with ScadaBridge; one-time forced sign-out acceptable |
|
||||
| 4 | Externalize via existing `OtOpcUaCookieOptions` + PostConfigure | Mirrors ScadaBridge pattern; fixes pre-existing bug where options class was bound but ignored |
|
||||
| 5 | Drop both `OnRedirectToLogin` and `OnRedirectToAccessDenied` overrides | Restores framework's browser-vs-AJAX heuristic; ScadaBridge does the same |
|
||||
| 6 | Set `LoginPath = "/login"`, `LogoutPath = "/auth/logout"` | Required for the framework's default redirect to work |
|
||||
| 7 | Accept 404 on `/Account/AccessDenied` for v1 | Matches ScadaBridge; rare path; follow-up to add minimal page |
|
||||
| 8 | Warning-log when `RequireHttpsCookie = false` | Audible misconfig signal; same as ScadaBridge |
|
||||
@@ -0,0 +1,652 @@
|
||||
# Auth/login alignment with ScadaBridge — implementation plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use `superpowers-extended-cc:executing-plans` or `superpowers-extended-cc:subagent-driven-development` to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Match ScadaBridge's single-Cookie auth pattern: drop the unused JwtBearer parallel scheme, restore the framework's default browser-vs-AJAX challenge heuristic, and externalize cookie config through the existing-but-unused `OtOpcUaCookieOptions`.
|
||||
|
||||
**Architecture:** Cookie-only auth. `JwtTokenService` keeps minting JWTs as the cookie payload (Blazor circuit hydration depends on it). Cookie name + idle timeout + HTTPS policy flow through `OtOpcUaCookieOptions` via a `Configure<IOptions<OtOpcUaCookieOptions>, ILoggerFactory>` PostConfigure step. Endpoint surface (`/auth/login`, `/auth/logout`, `/auth/ping`, `/auth/token`) unchanged.
|
||||
|
||||
**Tech stack:** .NET 10 / ASP.NET Core / `Microsoft.AspNetCore.Authentication.Cookies` / xUnit v3 + Shouldly / `Microsoft.AspNetCore.TestHost.TestServer`.
|
||||
|
||||
**Design doc:** `docs/plans/2026-05-29-auth-alignment-design.md` (commit `bc4fce5`). Each task below cites the design section it implements.
|
||||
|
||||
---
|
||||
|
||||
## Sequencing
|
||||
|
||||
```
|
||||
Task 1 (Options class)
|
||||
└─► Task 2 (Wiring rewrite + test assertion update)
|
||||
├─► Task 3 (3 new challenge tests)
|
||||
└─► Task 4 (csproj cleanup)
|
||||
└─► Task 5 (manual smoke + final commit)
|
||||
```
|
||||
|
||||
Tasks 3 and 4 are parallelizable (disjoint files).
|
||||
|
||||
---
|
||||
|
||||
## Task 1 — Extend `OtOpcUaCookieOptions`
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** none (Task 2 depends on this)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Security/CookieOptions.cs`
|
||||
|
||||
**Implements design:** Section 1 (Architecture, "Cookie config — externalized") + Section 2 (Components, file table row 1).
|
||||
|
||||
### Step 1: Replace file contents
|
||||
|
||||
Current file (12 lines):
|
||||
```csharp
|
||||
namespace ZB.MOM.WW.OtOpcUa.Security;
|
||||
|
||||
public sealed class OtOpcUaCookieOptions
|
||||
{
|
||||
public const string SectionName = "Security:Cookie";
|
||||
|
||||
/// <summary>Gets or sets the cookie name.</summary>
|
||||
public string Name { get; set; } = "OtOpcUa.Auth";
|
||||
|
||||
/// <summary>Idle sliding window, in minutes (default 30).</summary>
|
||||
public int ExpiryMinutes { get; set; } = 30;
|
||||
}
|
||||
```
|
||||
|
||||
Replace with:
|
||||
```csharp
|
||||
namespace ZB.MOM.WW.OtOpcUa.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Auth-cookie configuration bound from <c>Security:Cookie</c>. Consumed by a
|
||||
/// <c>Configure<IOptions<OtOpcUaCookieOptions>, ILoggerFactory></c> step inside
|
||||
/// <c>AddOtOpcUaAuth</c> that copies the values onto <c>CookieAuthenticationOptions</c>.
|
||||
/// </summary>
|
||||
public sealed class OtOpcUaCookieOptions
|
||||
{
|
||||
/// <summary>Configuration section name (<c>Security:Cookie</c>).</summary>
|
||||
public const string SectionName = "Security:Cookie";
|
||||
|
||||
/// <summary>
|
||||
/// Auth cookie name. Default uses the <c>ZB.MOM.WW</c> convention; mirrors ScadaBridge's
|
||||
/// <c>ZB.MOM.WW.ScadaBridge.Auth</c>. Changing this invalidates existing sessions on next
|
||||
/// deploy.
|
||||
/// </summary>
|
||||
public string Name { get; set; } = "ZB.MOM.WW.OtOpcUa.Auth";
|
||||
|
||||
/// <summary>Idle sliding-window length in minutes (default 30).</summary>
|
||||
public int ExpiryMinutes { get; set; } = 30;
|
||||
|
||||
/// <summary>
|
||||
/// Require HTTPS for the auth cookie. Default <c>true</c>: cookie is marked
|
||||
/// <c>SecurePolicy = Always</c>. Set to <c>false</c> ONLY for local dev stacks running
|
||||
/// plain HTTP — emits a startup Warning when disabled so the misconfiguration is
|
||||
/// audible.
|
||||
/// </summary>
|
||||
public bool RequireHttpsCookie { get; set; } = true;
|
||||
}
|
||||
```
|
||||
|
||||
### Step 2: Build
|
||||
|
||||
Run:
|
||||
```bash
|
||||
cd /Users/dohertj2/Desktop/OtOpcUa
|
||||
dotnet build src/Server/ZB.MOM.WW.OtOpcUa.Security/
|
||||
```
|
||||
Expected: 0 errors, 0 warnings.
|
||||
|
||||
### Step 3: Commit
|
||||
|
||||
```bash
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa add src/Server/ZB.MOM.WW.OtOpcUa.Security/CookieOptions.cs
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa commit -m "feat(security): extend OtOpcUaCookieOptions with RequireHttpsCookie + ZB.MOM.WW cookie name default"
|
||||
```
|
||||
|
||||
### Output report
|
||||
|
||||
- Lines before / after
|
||||
- Build clean
|
||||
- Commit SHA
|
||||
|
||||
### Self-review checklist
|
||||
|
||||
- [ ] `Name` default is `"ZB.MOM.WW.OtOpcUa.Auth"` (NOT `"OtOpcUa.Auth"`)
|
||||
- [ ] `RequireHttpsCookie` field added with default `true` and XML doc explaining the dev-only opt-out
|
||||
- [ ] `ExpiryMinutes` default unchanged at 30
|
||||
- [ ] `SectionName` constant unchanged
|
||||
- [ ] Build clean
|
||||
|
||||
---
|
||||
|
||||
## Task 2 — Rewrite auth wiring in `ServiceCollectionExtensions.cs`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (Tasks 3 and 4 depend on this)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`
|
||||
- Modify: `tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/AuthEndpointsIntegrationTests.cs:93`
|
||||
|
||||
**Implements design:** Section 1 + Section 2 file table rows 2 + 3.
|
||||
|
||||
### Step 1: Read current file
|
||||
|
||||
```bash
|
||||
cat /Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs
|
||||
```
|
||||
|
||||
Current shape (relevant excerpt):
|
||||
- `using Microsoft.AspNetCore.Authentication.JwtBearer;` at top
|
||||
- `internal sealed class ConfigureJwtBearerFromTokenService(JwtTokenService tokenService) : IPostConfigureOptions<JwtBearerOptions>` class (lines ~15-35)
|
||||
- `.AddCookie(o => { ... })` with `OnRedirectToLogin` / `OnRedirectToAccessDenied` overrides
|
||||
- `.AddJwtBearer(JwtBearerDefaults.AuthenticationScheme, _ => { })` chained after AddCookie
|
||||
- `services.AddSingleton<IPostConfigureOptions<JwtBearerOptions>, ConfigureJwtBearerFromTokenService>()` after the AddAuthentication block
|
||||
- `FallbackPolicy` builder takes both Cookie + JwtBearer schemes
|
||||
|
||||
### Step 2: Replace the file with the new shape
|
||||
|
||||
The full target file:
|
||||
|
||||
```csharp
|
||||
using Microsoft.AspNetCore.Authentication.Cookies;
|
||||
using Microsoft.AspNetCore.DataProtection;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Security.Jwt;
|
||||
using ZB.MOM.WW.OtOpcUa.Security.Ldap;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Security;
|
||||
|
||||
/// <summary>
|
||||
/// DI registration for OtOpcUa auth. Single Cookie scheme (the JWT lives inside the
|
||||
/// cookie as its credential payload); no JwtBearer parallel scheme. Matches ScadaBridge
|
||||
/// structurally — see <c>docs/plans/2026-05-29-auth-alignment-design.md</c>.
|
||||
/// </summary>
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>Wires cookie authentication, DataProtection key persistence to ConfigDb,
|
||||
/// LDAP services, and the LDAP-backed JwtTokenService. Browser flows redirect to
|
||||
/// <c>/login</c>; AJAX/JSON callers receive 401 (handled by the framework's default
|
||||
/// challenge heuristic).</summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="configuration">The application configuration root.</param>
|
||||
public static IServiceCollection AddOtOpcUaAuth(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
services.AddOptions<JwtOptions>().Bind(configuration.GetSection(JwtOptions.SectionName));
|
||||
services.AddOptions<OtOpcUaCookieOptions>().Bind(configuration.GetSection(OtOpcUaCookieOptions.SectionName));
|
||||
services.AddOptions<LdapOptions>().Bind(configuration.GetSection(LdapOptions.SectionName));
|
||||
|
||||
services.AddSingleton<JwtTokenService>();
|
||||
// Singleton — LdapAuthService is stateless (creates an LdapConnection per call) and
|
||||
// must be consumable by the Singleton LdapOpcUaUserAuthenticator on driver-role nodes.
|
||||
services.AddSingleton<ILdapAuthService, LdapAuthService>();
|
||||
|
||||
services.AddDataProtection()
|
||||
.PersistKeysToDbContext<OtOpcUaConfigDbContext>()
|
||||
.SetApplicationName("OtOpcUa");
|
||||
|
||||
services.AddAuthentication(CookieAuthenticationDefaults.AuthenticationScheme)
|
||||
.AddCookie(o =>
|
||||
{
|
||||
// Static fields only — Name / ExpireTimeSpan / SecurePolicy / SlidingExpiration
|
||||
// are bound from OtOpcUaCookieOptions in the PostConfigure block below.
|
||||
o.LoginPath = "/login";
|
||||
o.LogoutPath = "/auth/logout";
|
||||
o.Cookie.HttpOnly = true;
|
||||
o.Cookie.SameSite = SameSiteMode.Strict;
|
||||
// No OnRedirectToLogin / OnRedirectToAccessDenied overrides — let the framework's
|
||||
// built-in IsAjaxRequest heuristic do its thing (302 for browsers, 401 for AJAX).
|
||||
});
|
||||
|
||||
// Externalised cookie config — mirrors ScadaBridge's PostConfigure pattern. Fixes a
|
||||
// pre-existing latent bug where OtOpcUaCookieOptions was bound but ignored.
|
||||
services.AddOptions<CookieAuthenticationOptions>(CookieAuthenticationDefaults.AuthenticationScheme)
|
||||
.Configure<IOptions<OtOpcUaCookieOptions>, ILoggerFactory>((cookieOpts, ourOpts, lf) =>
|
||||
{
|
||||
var v = ourOpts.Value;
|
||||
cookieOpts.Cookie.Name = v.Name;
|
||||
cookieOpts.ExpireTimeSpan = TimeSpan.FromMinutes(v.ExpiryMinutes);
|
||||
cookieOpts.SlidingExpiration = true;
|
||||
cookieOpts.Cookie.SecurePolicy = v.RequireHttpsCookie
|
||||
? CookieSecurePolicy.Always
|
||||
: CookieSecurePolicy.SameAsRequest;
|
||||
|
||||
if (!v.RequireHttpsCookie)
|
||||
{
|
||||
lf.CreateLogger("ZB.MOM.WW.OtOpcUa.Security").LogWarning(
|
||||
"Security:Cookie:RequireHttpsCookie is DISABLED — auth cookie SecurePolicy is " +
|
||||
"SameAsRequest. The cookie-embedded JWT will travel in cleartext over plain HTTP. " +
|
||||
"Intended for local dev only — set Security:Cookie:RequireHttpsCookie=true in production.");
|
||||
}
|
||||
});
|
||||
|
||||
services.AddAuthorization(o =>
|
||||
{
|
||||
o.FallbackPolicy = new Microsoft.AspNetCore.Authorization.AuthorizationPolicyBuilder(
|
||||
CookieAuthenticationDefaults.AuthenticationScheme)
|
||||
.RequireAuthenticatedUser()
|
||||
.Build();
|
||||
|
||||
// DriverOperator: may issue Reconnect/Restart commands against live driver instances
|
||||
// from the Admin UI DriverStatusPanel. Map LDAP group → role via GroupToRole in
|
||||
// appsettings (e.g. "ot-driver-operator": "DriverOperator").
|
||||
o.AddPolicy("DriverOperator", policy =>
|
||||
policy.RequireRole("DriverOperator", "FleetAdmin"));
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
What's gone (vs. the original):
|
||||
- `using Microsoft.AspNetCore.Authentication.JwtBearer;`
|
||||
- `ConfigureJwtBearerFromTokenService` internal class entirely
|
||||
- `.AddJwtBearer(...)` chain after `.AddCookie(...)`
|
||||
- `services.AddSingleton<IPostConfigureOptions<JwtBearerOptions>, ConfigureJwtBearerFromTokenService>();`
|
||||
- `OnRedirectToLogin` / `OnRedirectToAccessDenied` event overrides
|
||||
- Hardcoded `o.Cookie.Name = "OtOpcUa.Auth"`, `o.SlidingExpiration = true`, `o.ExpireTimeSpan = TimeSpan.FromMinutes(30)`, `o.Cookie.SecurePolicy = CookieSecurePolicy.SameAsRequest`
|
||||
- `JwtBearerDefaults.AuthenticationScheme` from the `FallbackPolicy` builder
|
||||
|
||||
What's added:
|
||||
- `using Microsoft.Extensions.Logging;`
|
||||
- `o.LoginPath = "/login"`, `o.LogoutPath = "/auth/logout"` inside `.AddCookie(...)`
|
||||
- The `services.AddOptions<CookieAuthenticationOptions>(...).Configure<...>(...)` PostConfigure block
|
||||
|
||||
### Step 3: Update the one existing test assertion
|
||||
|
||||
In `tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/AuthEndpointsIntegrationTests.cs` around line 93:
|
||||
|
||||
```csharp
|
||||
// before
|
||||
response.Headers.GetValues("Set-Cookie").ShouldContain(c => c.StartsWith("OtOpcUa.Auth="));
|
||||
// after
|
||||
response.Headers.GetValues("Set-Cookie").ShouldContain(c => c.StartsWith("ZB.MOM.WW.OtOpcUa.Auth="));
|
||||
```
|
||||
|
||||
### Step 4: Build + run security tests
|
||||
|
||||
```bash
|
||||
cd /Users/dohertj2/Desktop/OtOpcUa
|
||||
dotnet build src/Server/ZB.MOM.WW.OtOpcUa.Security/
|
||||
dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/
|
||||
```
|
||||
|
||||
Expected: build clean; all Security.Tests pass (the existing 5 AuthEndpointsIntegrationTests + JwtTokenServiceTests + LdapHelperTests + RoleMapperTests).
|
||||
|
||||
### Step 5: Commit
|
||||
|
||||
```bash
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa add \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/AuthEndpointsIntegrationTests.cs
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa commit -m "$(cat <<'EOF'
|
||||
refactor(security): drop JwtBearer parallel scheme, externalize cookie config
|
||||
|
||||
Single Cookie auth scheme; framework default challenge restores 302 → /login
|
||||
for browsers + 401 for AJAX. OtOpcUaCookieOptions now flows through to
|
||||
CookieAuthenticationOptions via PostConfigure (fixes a latent bug where the
|
||||
options class was bound but ignored). Cookie name moves to
|
||||
ZB.MOM.WW.OtOpcUa.Auth; existing sessions get a one-time forced sign-out.
|
||||
EOF
|
||||
)"
|
||||
```
|
||||
|
||||
### Output report
|
||||
|
||||
- Net LOC change (additions / deletions)
|
||||
- Build clean
|
||||
- Test count run / passed
|
||||
- Commit SHA
|
||||
- Anything unexpected
|
||||
|
||||
### Self-review checklist
|
||||
|
||||
- [ ] `using Microsoft.AspNetCore.Authentication.JwtBearer;` removed
|
||||
- [ ] `ConfigureJwtBearerFromTokenService` class deleted
|
||||
- [ ] `.AddJwtBearer(...)` call deleted
|
||||
- [ ] `IPostConfigureOptions<JwtBearerOptions>` singleton registration deleted
|
||||
- [ ] `OnRedirectToLogin` and `OnRedirectToAccessDenied` overrides deleted
|
||||
- [ ] `LoginPath = "/login"` and `LogoutPath = "/auth/logout"` added inside `.AddCookie(...)`
|
||||
- [ ] PostConfigure block added consuming `OtOpcUaCookieOptions`
|
||||
- [ ] Warning log fires when `RequireHttpsCookie == false`
|
||||
- [ ] `FallbackPolicy` now takes only `CookieAuthenticationDefaults.AuthenticationScheme`
|
||||
- [ ] `DriverOperator` policy unchanged
|
||||
- [ ] Test assertion updated to `ZB.MOM.WW.OtOpcUa.Auth=`
|
||||
- [ ] `dotnet test tests/Server/.../Security.Tests/` all green
|
||||
|
||||
---
|
||||
|
||||
## Task 3 — Add browser-vs-AJAX challenge tests
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 4
|
||||
|
||||
**Files:**
|
||||
- Modify: `tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/AuthEndpointsIntegrationTests.cs` (append 3 new test methods + 1 helper)
|
||||
|
||||
**Implements design:** Section 5 "Tests added" + Section 4 "Auth challenge for unknown content type".
|
||||
|
||||
### Context for the implementer
|
||||
|
||||
`AuthEndpointsIntegrationTests` is `IAsyncLifetime`-backed and stands up a `TestServer` with `MapOtOpcUaAuth()` mounted (line 66). The `web.UseEndpoints(e => e.MapOtOpcUaAuth())` wires ONLY the four `/auth/*` endpoints — there is NO root `MapGet("/", ...)` registered. So an anonymous GET to `/` hits the routing pipeline, falls through to a 404 BEFORE auth middleware even challenges.
|
||||
|
||||
**The test harness needs a protected root endpoint.** Add one in `InitializeAsync` inside the `web.UseEndpoints(...)` callback. Then the 3 new tests will exercise the cookie scheme's challenge for that protected route.
|
||||
|
||||
### Step 1: Modify the test host setup
|
||||
|
||||
In `AuthEndpointsIntegrationTests.cs`, change `web.UseEndpoints(...)` (around line 66) from:
|
||||
```csharp
|
||||
app.UseEndpoints(e => e.MapOtOpcUaAuth());
|
||||
```
|
||||
to:
|
||||
```csharp
|
||||
app.UseEndpoints(e =>
|
||||
{
|
||||
e.MapOtOpcUaAuth();
|
||||
// Protected root used by AuthChallengeTests below — exercises the cookie
|
||||
// scheme's challenge heuristic without depending on the full Razor host.
|
||||
e.MapGet("/", () => Results.Ok("authenticated")).RequireAuthorization();
|
||||
});
|
||||
```
|
||||
|
||||
### Step 2: Add the three new test methods
|
||||
|
||||
Append at the bottom of the class (before the closing brace), keeping the file's existing summary style and using `TestContext.Current.CancellationToken` via the existing `Ct` property:
|
||||
|
||||
```csharp
|
||||
/// <summary>Anonymous browser GET of a protected route redirects to /login with a ReturnUrl.</summary>
|
||||
[Fact]
|
||||
public async Task Root_anonymous_browser_GET_redirects_to_login()
|
||||
{
|
||||
var client = NewClientNoRedirect();
|
||||
var req = new HttpRequestMessage(HttpMethod.Get, "/");
|
||||
req.Headers.Accept.ParseAdd("text/html");
|
||||
var resp = await client.SendAsync(req, Ct);
|
||||
|
||||
resp.StatusCode.ShouldBe(HttpStatusCode.Found);
|
||||
resp.Headers.Location.ShouldNotBeNull();
|
||||
resp.Headers.Location!.OriginalString.ShouldContain("/login");
|
||||
resp.Headers.Location.OriginalString.ShouldContain("ReturnUrl");
|
||||
}
|
||||
|
||||
/// <summary>Anonymous AJAX GET of a protected route returns 401 with no Location.</summary>
|
||||
[Fact]
|
||||
public async Task Root_anonymous_ajax_GET_returns_401()
|
||||
{
|
||||
var client = NewClientNoRedirect();
|
||||
var req = new HttpRequestMessage(HttpMethod.Get, "/");
|
||||
req.Headers.Add("X-Requested-With", "XMLHttpRequest");
|
||||
var resp = await client.SendAsync(req, Ct);
|
||||
|
||||
resp.StatusCode.ShouldBe(HttpStatusCode.Unauthorized);
|
||||
resp.Headers.Location.ShouldBeNull();
|
||||
}
|
||||
|
||||
/// <summary>Anonymous JSON GET of a protected route returns 401.</summary>
|
||||
[Fact]
|
||||
public async Task Root_anonymous_json_GET_returns_401()
|
||||
{
|
||||
var client = NewClientNoRedirect();
|
||||
var req = new HttpRequestMessage(HttpMethod.Get, "/");
|
||||
req.Headers.Accept.ParseAdd("application/json");
|
||||
var resp = await client.SendAsync(req, Ct);
|
||||
|
||||
resp.StatusCode.ShouldBe(HttpStatusCode.Unauthorized);
|
||||
}
|
||||
```
|
||||
|
||||
### Step 3: Add the no-redirect client helper
|
||||
|
||||
Right next to the existing `NewClient()` method (line 82):
|
||||
|
||||
```csharp
|
||||
/// <summary>Creates a TestServer-backed HttpClient that does NOT auto-follow redirects.
|
||||
/// Used by challenge tests so we can assert on the 302 / Location directly.</summary>
|
||||
private HttpClient NewClientNoRedirect() => new(_server.CreateHandler())
|
||||
{
|
||||
BaseAddress = _server.BaseAddress,
|
||||
};
|
||||
```
|
||||
|
||||
### Step 4: Run the tests
|
||||
|
||||
```bash
|
||||
cd /Users/dohertj2/Desktop/OtOpcUa
|
||||
dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/
|
||||
```
|
||||
|
||||
Expected: existing 5 tests still pass + 3 new tests pass = 8+ total green.
|
||||
|
||||
**If `Root_anonymous_browser_GET_redirects_to_login` returns 200 instead of 302**: HttpClient is still auto-following redirects. Two fixes to try in order:
|
||||
1. Confirm `NewClientNoRedirect` uses `_server.CreateHandler()` (not `CreateClient()`).
|
||||
2. If still wrong, swap to: `var handler = new HttpClientHandler { AllowAutoRedirect = false };` — but TestServer doesn't expose HttpClientHandler directly. The `CreateHandler()` path SHOULD return a non-redirecting handler; if it doesn't, the implementation may need a `DelegatingHandler` wrapper.
|
||||
|
||||
**If `Root_anonymous_browser_GET_redirects_to_login` returns 401 instead of 302**: the cookie scheme isn't classifying `Accept: text/html` as a browser. Inspect Task 2's changes — `OnRedirectToLogin` may not have been fully removed, OR `LoginPath` was not set, OR an `Accept` parsing issue. Look at the response body — if it's empty + 401, the JwtBearer scheme or the override is still in play.
|
||||
|
||||
### Step 5: Commit
|
||||
|
||||
```bash
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa add tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/AuthEndpointsIntegrationTests.cs
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa commit -m "test(security): add browser-vs-AJAX challenge tests for root path"
|
||||
```
|
||||
|
||||
### Output report
|
||||
|
||||
- 3 new tests + 1 helper + modified InitializeAsync
|
||||
- Build clean
|
||||
- Test count: existing N + 3 new = N+3 green
|
||||
- Commit SHA
|
||||
- Anything unexpected (e.g. redirect-following behavior of `_server.CreateHandler()`)
|
||||
|
||||
### Self-review checklist
|
||||
|
||||
- [ ] `MapGet("/", ...).RequireAuthorization()` added inside `web.UseEndpoints(...)`
|
||||
- [ ] `NewClientNoRedirect()` helper added
|
||||
- [ ] 3 new `[Fact]` methods added with `TestContext.Current.CancellationToken` via the `Ct` property
|
||||
- [ ] Each test asserts on the exact status + Location header (or absence)
|
||||
- [ ] All tests green
|
||||
- [ ] Existing 5 tests still pass
|
||||
|
||||
---
|
||||
|
||||
## Task 4 — Remove `Microsoft.AspNetCore.Authentication.JwtBearer` package reference
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** Task 3
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Security/ZB.MOM.WW.OtOpcUa.Security.csproj` (delete one line)
|
||||
- Verify: `Directory.Packages.props` — leave the `<PackageVersion Include="Microsoft.AspNetCore.Authentication.JwtBearer" ... />` entry in place (other projects may consume it).
|
||||
|
||||
**Implements design:** Section 2 "Package references" + Section 6 phase 4.
|
||||
|
||||
### Step 1: Confirm no remaining consumer in the Security project
|
||||
|
||||
```bash
|
||||
grep -rn "Microsoft\.AspNetCore\.Authentication\.JwtBearer\|JwtBearer" \
|
||||
/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Security/ \
|
||||
--include="*.cs"
|
||||
```
|
||||
|
||||
Expected: zero matches. (Task 2 removed all uses.) If there are matches, STOP and report — Task 2 was incomplete.
|
||||
|
||||
### Step 2: Remove the PackageReference
|
||||
|
||||
In `src/Server/ZB.MOM.WW.OtOpcUa.Security/ZB.MOM.WW.OtOpcUa.Security.csproj`, find this line (currently around line 13):
|
||||
```xml
|
||||
<PackageReference Include="Microsoft.AspNetCore.Authentication.JwtBearer"/>
|
||||
```
|
||||
Delete it. **Keep** these:
|
||||
```xml
|
||||
<PackageReference Include="Microsoft.IdentityModel.Tokens"/>
|
||||
<PackageReference Include="System.IdentityModel.Tokens.Jwt"/>
|
||||
```
|
||||
(`JwtTokenService` consumes those for `TokenValidationParameters` + JWT creation respectively — they're not from the JwtBearer authentication package.)
|
||||
|
||||
### Step 3: Check whether ANY other project still references the package
|
||||
|
||||
```bash
|
||||
grep -rn "Microsoft\.AspNetCore\.Authentication\.JwtBearer" \
|
||||
/Users/dohertj2/Desktop/OtOpcUa/src/ /Users/dohertj2/Desktop/OtOpcUa/tests/ \
|
||||
--include="*.csproj"
|
||||
```
|
||||
|
||||
If zero results: also remove the `<PackageVersion Include="Microsoft.AspNetCore.Authentication.JwtBearer" ...>` line from `Directory.Packages.props` (search for it). If one or more other projects still reference it, leave `Directory.Packages.props` alone.
|
||||
|
||||
### Step 4: Restore + build
|
||||
|
||||
```bash
|
||||
cd /Users/dohertj2/Desktop/OtOpcUa
|
||||
dotnet restore src/Server/ZB.MOM.WW.OtOpcUa.Security/
|
||||
dotnet build src/Server/ZB.MOM.WW.OtOpcUa.Security/
|
||||
dotnet build ZB.MOM.WW.OtOpcUa.slnx
|
||||
```
|
||||
|
||||
Expected: 0 NEW errors. The known pre-existing 12 errors (OpcUaServer.Tests + Runtime.Tests + AbLegacy.Cli + S7.Cli) remain unchanged.
|
||||
|
||||
### Step 5: Commit
|
||||
|
||||
```bash
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa add \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Security/ZB.MOM.WW.OtOpcUa.Security.csproj \
|
||||
Directory.Packages.props # only if you also removed it from Directory.Packages.props
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa commit -m "chore(security): drop Microsoft.AspNetCore.Authentication.JwtBearer (unused)"
|
||||
```
|
||||
|
||||
If only the csproj changed: omit `Directory.Packages.props` from the add.
|
||||
|
||||
### Output report
|
||||
|
||||
- Was Directory.Packages.props also touched? Justify based on whether other projects still reference the package.
|
||||
- Build clean (0 new errors)
|
||||
- Commit SHA
|
||||
|
||||
### Self-review checklist
|
||||
|
||||
- [ ] Confirmed zero `Microsoft.AspNetCore.Authentication.JwtBearer` or `JwtBearer` matches in `src/Server/ZB.MOM.WW.OtOpcUa.Security/**/*.cs` before deletion
|
||||
- [ ] PackageReference removed from Security.csproj
|
||||
- [ ] `Microsoft.IdentityModel.Tokens` and `System.IdentityModel.Tokens.Jwt` kept
|
||||
- [ ] Directory.Packages.props touched ONLY if no other project consumes the package
|
||||
- [ ] Full solution build adds zero new errors
|
||||
|
||||
---
|
||||
|
||||
## Task 5 — Manual smoke + final commit
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none
|
||||
|
||||
**Files:** none (verification + optional cleanup commit)
|
||||
|
||||
**Implements design:** Section 5 "Manual smoke" + Section 6 phase 5.
|
||||
|
||||
### Step 1: Restart the docker-dev cluster
|
||||
|
||||
The admin nodes need to pick up the new `Microsoft.AspNetCore.TestHost`-side code path AND the new cookie name. Since the in-cluster admin processes run a prior build, force a rebuild + recreate:
|
||||
|
||||
```bash
|
||||
cd /Users/dohertj2/Desktop/OtOpcUa
|
||||
docker compose -f docker-dev/docker-compose.yml up -d --build admin-a admin-b
|
||||
```
|
||||
|
||||
Wait ~15 s for warm-up. Then:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml ps admin-a admin-b
|
||||
```
|
||||
|
||||
Both should show `Up` and `(healthy)` (or `Up` if no healthcheck).
|
||||
|
||||
### Step 2: curl smoke
|
||||
|
||||
```bash
|
||||
# Anonymous browser-shaped GET → 302 to /login with ReturnUrl
|
||||
curl -i -H "Accept: text/html" http://localhost:9200/ 2>&1 | head -12
|
||||
# Expected: HTTP/1.1 302 Found, Location: /login?ReturnUrl=%2F
|
||||
|
||||
# Anonymous AJAX GET → 401
|
||||
curl -i -H "X-Requested-With: XMLHttpRequest" http://localhost:9200/ 2>&1 | head -8
|
||||
# Expected: HTTP/1.1 401 Unauthorized
|
||||
|
||||
# Anonymous JSON GET → 401
|
||||
curl -i -H "Accept: application/json" http://localhost:9200/ 2>&1 | head -8
|
||||
# Expected: HTTP/1.1 401 Unauthorized
|
||||
|
||||
# Login form → 302 with Set-Cookie ZB.MOM.WW.OtOpcUa.Auth
|
||||
curl -i -X POST -d "username=alice&password=alice" \
|
||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||
http://localhost:9200/auth/login 2>&1 | head -15
|
||||
# Expected: HTTP/1.1 302 Found, Set-Cookie: ZB.MOM.WW.OtOpcUa.Auth=... (the test stub user may differ — check docker-compose's GLAuth seed for a valid LDAP creds pair)
|
||||
```
|
||||
|
||||
### Step 3: Chrome smoke (via the macbook browser instance from earlier in the session)
|
||||
|
||||
1. Open `http://localhost:9200/` — should redirect to `/login?ReturnUrl=%2F` (not Chrome's error page)
|
||||
2. Sign in via the form
|
||||
3. DevTools → Application → Cookies → confirm cookie name is `ZB.MOM.WW.OtOpcUa.Auth`
|
||||
4. Navigate to `http://localhost:9200/` again — should render the AdminUI dashboard
|
||||
5. Click logout → confirm redirect back to `/login`
|
||||
|
||||
### Step 4: Optional CLAUDE.md update
|
||||
|
||||
If `CLAUDE.md` mentions the old `OtOpcUa.Auth` cookie name anywhere, update to the new `ZB.MOM.WW.OtOpcUa.Auth`. Run:
|
||||
|
||||
```bash
|
||||
grep -n "OtOpcUa\.Auth" /Users/dohertj2/Desktop/OtOpcUa/CLAUDE.md
|
||||
```
|
||||
|
||||
If matches: update them, otherwise skip.
|
||||
|
||||
### Step 5: Final commit (only if Step 4 changed CLAUDE.md)
|
||||
|
||||
```bash
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa add CLAUDE.md
|
||||
git -C /Users/dohertj2/Desktop/OtOpcUa commit -m "docs: update cookie name reference in CLAUDE.md"
|
||||
```
|
||||
|
||||
### Output report
|
||||
|
||||
- All 4 curl smoke checks passed?
|
||||
- Chrome smoke passed?
|
||||
- CLAUDE.md changed?
|
||||
- Final SHA on master (if any docs commit)
|
||||
- Commit count since this plan started (vs `bc4fce5`)
|
||||
|
||||
### Self-review checklist
|
||||
|
||||
- [ ] `docker compose up -d --build admin-a admin-b` succeeded
|
||||
- [ ] All 4 curl smoke checks return expected status codes
|
||||
- [ ] Chrome smoke shows redirect to `/login`, then dashboard after auth
|
||||
- [ ] Cookie name in DevTools matches `ZB.MOM.WW.OtOpcUa.Auth`
|
||||
- [ ] No new commits left uncommitted in the working tree
|
||||
|
||||
---
|
||||
|
||||
## Verification gates (apply at end of every task)
|
||||
|
||||
- `dotnet build src/Server/ZB.MOM.WW.OtOpcUa.Security/` — 0 errors
|
||||
- `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/` — all green (existing + new)
|
||||
- `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — no NEW errors beyond the 12 pre-existing
|
||||
- No untracked files staged accidentally (especially `sql_login.txt`, `pki/`, doc-fix artifacts)
|
||||
|
||||
---
|
||||
|
||||
## Risk hot-spots for reviewers
|
||||
|
||||
1. **TestServer's no-redirect HttpClient.** The plan assumes `new HttpClient(_server.CreateHandler()) { BaseAddress = _server.BaseAddress }` does NOT auto-follow redirects. If it does, the `Root_anonymous_browser_GET_redirects_to_login` test fails with 200 instead of 302. Fix path documented in Task 3 Step 4.
|
||||
2. **Framework default of `Accept: */*` → 302.** Curl's default Accept header is `*/*`, which the framework classifies as browser → 302. Documented behavior, mirrors ScadaBridge; reviewers should not flag the smoke step that uses `Accept: text/html` as redundant — it's the explicit "browser" assertion.
|
||||
3. **Cookie rename invalidates sessions.** The deploy effectively logs every currently-signed-in user out. Document in commit body; the cluster was just restarted on the new API key anyway, so the timing is opportune.
|
||||
4. **`Directory.Packages.props` change is conditional.** Don't touch it if other projects still consume the JwtBearer package. Task 4 has explicit grep guard.
|
||||
5. **`/Account/AccessDenied` 404.** Authenticated users hitting a `DriverOperator`-only route now get a generic 404 page instead of a clean access-denied message. Documented design choice; follow-up to add a Razor page if UX feedback demands it.
|
||||
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-29-auth-alignment-plan.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 1: Extend OtOpcUaCookieOptions", "status": "pending"},
|
||||
{"id": 2, "subject": "Task 2: Rewrite auth wiring + update cookie-name assertion", "status": "pending", "blockedBy": [1]},
|
||||
{"id": 3, "subject": "Task 3: Add browser-vs-AJAX challenge tests", "status": "pending", "blockedBy": [2]},
|
||||
{"id": 4, "subject": "Task 4: Remove JwtBearer package reference", "status": "pending", "blockedBy": [2]},
|
||||
{"id": 5, "subject": "Task 5: Manual smoke + final commit", "status": "pending", "blockedBy": [3, 4]}
|
||||
],
|
||||
"lastUpdated": "2026-05-29T00:00:00Z"
|
||||
}
|
||||
@@ -583,10 +583,20 @@ language binding.
|
||||
|
||||
**Depends on:** A.1 merged (proto change live).
|
||||
|
||||
**Files** (`c:\Users\dohertj2\Desktop\mxaccessgw\clients\`):
|
||||
**Files** (`c:\Users\dohertj2\Desktop\mxaccessgw\src\` for .NET — note the sibling
|
||||
repo restructured after this plan was written; `clients/dotnet/MxGateway.Client.csproj`
|
||||
no longer exists, the proto contracts now live in
|
||||
`src/ZB.MOM.WW.MxGateway.Contracts/` under the new namespace
|
||||
`ZB.MOM.WW.MxGateway.Contracts.Proto[.Galaxy]`; the OtOpcUa driver currently
|
||||
consumes vendored binaries from the pre-restructure build — see
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/libs/README.md`):
|
||||
|
||||
1. **.NET** — codegen runs on csproj rebuild via `Grpc.Tools`; just
|
||||
rebuild `MxGateway.Client.csproj` after pulling A.1.
|
||||
1. **.NET** — codegen runs on csproj rebuild via `Grpc.Tools`; rebuild
|
||||
`src/ZB.MOM.WW.MxGateway.Contracts/ZB.MOM.WW.MxGateway.Contracts.csproj`
|
||||
after pulling A.1. (If unwinding the driver's vendored binaries onto the
|
||||
new contracts namespace as part of the alarm work, namespace-rename + a
|
||||
reimplementation of the missing `MxGatewayClient` / `MxGatewaySession`
|
||||
wrappers is also in scope.)
|
||||
2. **Python** — run `clients\python\generate-proto.ps1`; commit the
|
||||
regenerated `_pb2.py` + `_pb2_grpc.py` files under
|
||||
`clients\python\src\`.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# High-Level Requirements
|
||||
|
||||
> **Revision** — Refreshed 2026-04-19 for the OtOpcUa v2 multi-driver platform (task #205). The original 2025 text described a single-process Galaxy/MXAccess server called LmxOpcUa. Today the project is the **OtOpcUa** multi-driver OPC UA platform deployed as three cooperating processes (Server, Admin, Galaxy.Host). The Galaxy integration is one of seven shipped drivers. HLR-001 through HLR-008 have been rewritten driver-agnostically; HLR-009 has been retired (the embedded Status Dashboard is superseded by the Admin UI). HLR-010 through HLR-017 are new and cover plug-in drivers, resilience, Config DB / draft-publish, cluster redundancy, fleet-wide identifier uniqueness, Admin UI, audit logging, metrics, and the Roslyn capability-wrapping analyzer.
|
||||
> **Revision** — Refreshed 2026-05-23 for the OtOpcUa v2 multi-driver platform. The original 2025 text described a single-process Galaxy/MXAccess server called LmxOpcUa. Today the project is the **OtOpcUa** multi-driver OPC UA platform deployed as two cooperating processes (Server, Admin). The Galaxy integration is one of seven shipped drivers and is now an in-process Tier-A driver that talks gRPC to a separately installed `mxaccessgw` gateway (sibling repo) — PR 7.2 (2026-04-30) retired the legacy out-of-process `Galaxy.Host` Windows service. HLR-001 through HLR-008 have been rewritten driver-agnostically; HLR-009 has been retired (the embedded Status Dashboard is superseded by the Admin UI). HLR-010 through HLR-017 cover plug-in drivers, resilience, Config DB / draft-publish, cluster redundancy, fleet-wide identifier uniqueness, Admin UI, audit logging, metrics, and the Roslyn capability-wrapping analyzer.
|
||||
|
||||
## HLR-001: OPC UA Server
|
||||
|
||||
@@ -28,11 +28,10 @@ Drivers whose backend has a native change signal (e.g. Galaxy's `time_of_last_de
|
||||
|
||||
## HLR-007: Service Hosting
|
||||
|
||||
The system shall be deployed as three cooperating Windows services:
|
||||
The system shall be deployed as two cooperating Windows services (the legacy `OtOpcUa.Galaxy.Host` x86 host was retired in PR 7.2 — Galaxy access now flows through the separately installed `mxaccessgw` gateway, which lives in a sibling repository and is not part of the OtOpcUa deployment):
|
||||
|
||||
- **OtOpcUa.Server** — .NET 10 x64, `Microsoft.Extensions.Hosting` + `AddWindowsService`, hosts all non-Galaxy drivers in-process and the OPC UA endpoint.
|
||||
- **OtOpcUa.Server** — .NET 10 AnyCPU, `Microsoft.Extensions.Hosting` + `AddWindowsService` (decision #30 replaced the original TopShelf choice), hosts every driver in-process — including the new Tier-A `GalaxyDriver` that speaks gRPC to `mxaccessgw` — and the OPC UA endpoint.
|
||||
- **OtOpcUa.Admin** — .NET 10 x64 Blazor Server web app, hosts the admin UI, SignalR hubs for live updates, `/metrics` Prometheus endpoint, and audit log writers.
|
||||
- **OtOpcUa.Galaxy.Host** — .NET Framework 4.8 x86 (TopShelf), hosts MXAccess COM + Galaxy Repository SQL + Historian plugin. Talks to `Driver.Galaxy.Proxy` inside `OtOpcUa.Server` via a named pipe (MessagePack over length-prefixed frames, per-process shared secret, SID-restricted ACL).
|
||||
|
||||
## HLR-008: Logging
|
||||
|
||||
|
||||
+22
-19
@@ -1,5 +1,19 @@
|
||||
# Security
|
||||
|
||||
> **v2 status (2026-05-26).** The four security concerns below are unchanged in v2.
|
||||
> Paths + project names moved: `OtOpcUa.Server/Security/` → `OtOpcUa.Security/`
|
||||
> (`Ldap/`, `Jwt/`, `Endpoints/AuthEndpoints.cs`), `OtOpcUa.Admin` is gone (its
|
||||
> auth + role-grant pages live in `OtOpcUa.AdminUI`), and Admin auth policies
|
||||
> register in `OtOpcUa.Host/Program.cs` via `AddOtOpcUaAuth` rather than in a
|
||||
> separate Admin process. The v2 `Security:Jwt` section adds JWT bearer auth
|
||||
> alongside the existing cookie scheme (`AddJwtBearer` wired via
|
||||
> `IPostConfigureOptions<JwtBearerOptions>` in `OtOpcUa.Security`). DataProtection
|
||||
> keys persist to the shared `ConfigDb.DataProtectionKeys` table so cookies
|
||||
> survive failover between admin-role nodes.
|
||||
>
|
||||
> See `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §5 for the v2
|
||||
> auth + DataProtection rationale.
|
||||
|
||||
OtOpcUa has four independent security concerns. This document covers all four:
|
||||
|
||||
1. **Transport security** — OPC UA secure channel (signing, encryption, X.509 trust).
|
||||
@@ -95,7 +109,7 @@ The Server accepts three OPC UA identity-token types:
|
||||
| Token | Handler | Notes |
|
||||
|---|---|---|
|
||||
| Anonymous | `IUserAuthenticator.AuthenticateAsync(username: "", password: "")` | Refused in strict mode unless explicit anonymous grants exist; allowed in lax mode for backward compatibility. |
|
||||
| UserName/Password | `LdapUserAuthenticator` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/LdapUserAuthenticator.cs`) | LDAP bind + group lookup; resolved `LdapGroups` flow into the session's identity bearer (`ILdapGroupsBearer`). |
|
||||
| UserName/Password | `LdapOpcUaUserAuthenticator` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/OpcUa/LdapOpcUaUserAuthenticator.cs`, backed by `LdapAuthService` at `src/Server/ZB.MOM.WW.OtOpcUa.Security/Ldap/LdapAuthService.cs`) | LDAP bind + group lookup; resolved `LdapGroups` flow into the session's identity bearer (`ILdapGroupsBearer`). |
|
||||
| X.509 Certificate | Stack-level acceptance + role mapping via CN | X.509 identity carries `AuthenticatedUser` + read roles; finer-grain authorization happens through the data-plane ACLs. |
|
||||
|
||||
### LDAP bind flow (`LdapUserAuthenticator`)
|
||||
@@ -207,20 +221,16 @@ The three Write tiers map to Galaxy's v1 `SecurityClassification` — `FreeAcces
|
||||
|
||||
`NodeScope` carries `(ClusterId, NamespaceId, AreaId, LineId, EquipmentId, TagId)`; any suffix may be null — a tag-level ACL is more specific than an area-level ACL but both contribute via union.
|
||||
|
||||
### Dispatch gate — `AuthorizationGate`
|
||||
### Dispatch gate — `IPermissionEvaluator`
|
||||
|
||||
`src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/AuthorizationGate.cs` bridges the OPC UA stack's `ISystemContext.UserIdentity` to the evaluator. `DriverNodeManager` holds exactly one reference to it and calls `IsAllowed(identity, OpcUaOperation.*, NodeScope)` on every Read, Write, HistoryRead, Browse, Subscribe, AckAlarm, Call path. A false return short-circuits the dispatch with `BadUserAccessDenied`.
|
||||
`IPermissionEvaluator.Authorize(session, operation, scope)` (default impl `TriePermissionEvaluator` at `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs`) bridges the OPC UA stack's `ISystemContext.UserIdentity` to the trie. The dispatch path calls it on every Read, Write, HistoryRead, Browse, Subscribe, AckAlarm, Call. A non-allow decision short-circuits the dispatch with `BadUserAccessDenied`.
|
||||
|
||||
Key properties:
|
||||
|
||||
- **Driver-agnostic.** No driver-level code participates in authorization decisions. Drivers report `SecurityClassification` as metadata on tag discovery; everything else flows through `AuthorizationGate`.
|
||||
- **Driver-agnostic.** No driver-level code participates in authorization decisions. Drivers report `SecurityClassification` as metadata on tag discovery; everything else flows through the evaluator.
|
||||
- **Fail-open-during-transition.** `StrictMode = false` (default during ACL rollouts) lets sessions without resolved LDAP groups proceed; flip `Authorization:StrictMode = true` in production once ACLs are populated.
|
||||
- **Evaluator stays pure.** `TriePermissionEvaluator` has no OPC UA stack dependency — it's tested directly from xUnit.
|
||||
|
||||
### Probe-this-permission (Admin UI)
|
||||
|
||||
`PermissionProbeService` (`src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/PermissionProbeService.cs`) lets an operator ask "if a user with groups X, Y, Z asked to do operation O on node N, would it succeed?" The answer is rendered in the AclsTab "Probe" dialog — same evaluator, same trie, so the Admin UI answer and the live Server answer cannot disagree.
|
||||
|
||||
### Full model
|
||||
|
||||
See [`docs/v2/acl-design.md`](v2/acl-design.md) for the complete design: trie invalidation, flag semantics, per-path override rules, and the reasoning behind additive-only (no Deny).
|
||||
@@ -235,23 +245,16 @@ Per decision #150 control-plane roles are **deliberately independent of data-pla
|
||||
|
||||
### Roles
|
||||
|
||||
`src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/AdminRoles.cs`:
|
||||
The `AdminRole` enum (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Enums/AdminRole.cs`) defines:
|
||||
|
||||
| Role | Capabilities |
|
||||
|---|---|
|
||||
| `ConfigViewer` | Read-only access to drafts, generations, audit log, fleet status. |
|
||||
| `ConfigEditor` | ConfigViewer plus draft editing (UNS, equipment, tags, ACLs, driver instances, reservations, CSV imports). Cannot publish. |
|
||||
| `FleetAdmin` | ConfigEditor plus publish, cluster/node CRUD, credential management, role-grant management. |
|
||||
| `FleetAdmin` | ConfigEditor plus publish, cluster/node CRUD, credential management, role-grant management. Also satisfies the `DriverOperator` authorization policy. |
|
||||
| `DriverOperator` | May issue **Reconnect** and **Restart** commands against live driver instances from the Admin UI `DriverStatusPanel`. Gated by the `DriverOperator` named policy in `AddAuthorization` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`). Map an LDAP group via `GroupToRole`, e.g. `"ot-driver-operator": "DriverOperator"`. |
|
||||
|
||||
Policies registered in Admin `Program.cs`:
|
||||
|
||||
```csharp
|
||||
builder.Services.AddAuthorizationBuilder()
|
||||
.AddPolicy("CanEdit", p => p.RequireRole(AdminRoles.ConfigEditor, AdminRoles.FleetAdmin))
|
||||
.AddPolicy("CanPublish", p => p.RequireRole(AdminRoles.FleetAdmin));
|
||||
```
|
||||
|
||||
Razor pages and API endpoints gate with `[Authorize(Policy = "CanEdit")]` / `"CanPublish"`; nav-menu sections hide via `<AuthorizeView>`.
|
||||
In v2 the authentication + authorization stack is wired centrally by `AddOtOpcUaAuth` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`) and Razor pages gate inline with the role names, e.g. `@attribute [Authorize(Roles = "FleetAdmin,ConfigEditor")]` on `Deployments.razor`. Nav-menu sections hide via `<AuthorizeView>`.
|
||||
|
||||
### Role grant source
|
||||
|
||||
|
||||
@@ -0,0 +1,265 @@
|
||||
# Admin UI rebuild plan (F15)
|
||||
|
||||
**Status:** UX kickoff — proposals to react to before any per-page rebuild starts.
|
||||
**Last updated:** 2026-05-26 on `v2-akka-fuse`.
|
||||
|
||||
## Why this isn't a straight port
|
||||
|
||||
The v1 Admin UI was built around `ConfigGeneration` draft → publish:
|
||||
operators edited a **draft** generation, the system computed a **diff** against the
|
||||
last published one, and a manual **Publish** sealed it. Six full pages
|
||||
(`DraftEditor`, `DiffViewer`, `DiffSection`, `Generations`, plus the per-tab
|
||||
"viewing draft N" header) lived to make this workflow legible.
|
||||
|
||||
v2 replaces that with **live-edit + snapshot-deploy** (decisions #14a–#14e on this
|
||||
branch). Edits write directly to live tables guarded by `RowVersion`
|
||||
concurrency; deploying is a single click that snapshots the current live state
|
||||
and dispatches it via Akka. Drift between "current live" and "last sealed
|
||||
deployment" surfaces as a one-line indicator on the
|
||||
[Deployments](../../src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Deployments.razor)
|
||||
page.
|
||||
|
||||
That collapses **six pages → zero** before we ship a line of new Razor. The
|
||||
remaining ~41 legacy pages map to ~30 v2 pages once redundant fleet-wide views
|
||||
fold into their cluster-tab equivalents.
|
||||
|
||||
## Inventory: 47 legacy pages → v2 disposition
|
||||
|
||||
Source: `git show 76310b8^ -- 'src/Server/ZB.MOM.WW.OtOpcUa.Admin/**/*.razor'`.
|
||||
|
||||
### Site shell (5 files) — port
|
||||
|
||||
| Legacy | v2 status | Notes |
|
||||
|---|---|---|
|
||||
| `App.razor`, `Routes.razor`, `_Imports.razor` | Port | Boilerplate; minor render-mode tweaks |
|
||||
| `Layout/MainLayout.razor` | ✅ Already in v2 | Done in Task 48 |
|
||||
| `Components/Pages/Login.razor`, `Account.razor` | Port | Auth endpoints changed (cookie+JWT hybrid, Task 26); login form posts to `/auth/login` now |
|
||||
|
||||
### Shared widgets (5 files) — port
|
||||
|
||||
| Legacy | v2 status |
|
||||
|---|---|
|
||||
| `StatusBadge.razor` | ✅ Already in v2 |
|
||||
| `LoadingSpinner.razor` | ✅ Already in v2 |
|
||||
| `ToastNotification.razor` | ✅ Already in v2 |
|
||||
| `ClusterAuthorizeView.razor`, `RedirectToLogin.razor` | Port — adjust for v2 `IUserAuthenticator` |
|
||||
|
||||
### Fleet (1 file) — reshape
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Fleet.razor` | **Reshape.** Drop the v1 "poller reads central DB" data source. v2 reads `NodeDeploymentState` (Applied / Failed / Stale per node) + subscribes to `FleetStatusHub` for live `ServiceLevel` updates (already wired in F16) + queries `IFleetDiagnosticsClient.GetDiagnostics` (F17) for per-node driver health. Single page, similar shape to v1. |
|
||||
|
||||
### Cluster CRUD (3 files) — port
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `ClustersList.razor` | Port |
|
||||
| `NewCluster.razor` | Port |
|
||||
| `ClusterDetail.razor` | **Port — drop draft/publish chrome.** No "New draft" button; no "current published" sidebar. Replace with "Last deployed" badge + a "Deploy" button (already a SignalR-aware widget on the Deployments page; this becomes a cluster-scoped variant). |
|
||||
|
||||
### Draft/publish workflow (4 files) — **drop entirely**
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `DraftEditor.razor` | **Drop.** No drafts in v2. |
|
||||
| `DiffViewer.razor` | **Drop.** Drift indicator replaces it on Deployments page. |
|
||||
| `DiffSection.razor` | **Drop.** |
|
||||
| `Generations.razor` | **Drop — replaced by `Deployments.razor`** (already shipped in v2 ahead of F15). |
|
||||
|
||||
### Cluster tabs (11 files) — port as live-edit forms
|
||||
|
||||
Each becomes a live-edit surface: load the entity, bind to a form, save with
|
||||
`RowVersion` concurrency check (409 on conflict → toast + reload). No "viewing
|
||||
draft N" header; no per-tab snapshot view.
|
||||
|
||||
| Legacy tab | v2 strategy |
|
||||
|---|---|
|
||||
| `EquipmentTab.razor` | Port — UNS path tree picker stays |
|
||||
| `UnsTab.razor` | Port — same |
|
||||
| `NamespacesTab.razor` | Port |
|
||||
| `DriversTab.razor` | Port — **driver-type-specific editors are a separate question (see below)** |
|
||||
| `TagsTab.razor` | Port |
|
||||
| `AclsTab.razor` | Port — wire to v2 LDAP group → role mapping (see RoleGrants question) |
|
||||
| `RedundancyTab.razor` | Port — surface v2 `ServiceLevel` calc (Task 35) instead of v1 redundancy state machine |
|
||||
| `ScriptedAlarmsTab.razor` | Port |
|
||||
| `ScriptsTab.razor` | Port |
|
||||
| `VirtualTagsTab.razor` | Port |
|
||||
| `AuditTab.razor` | Port — wire to v2 `ConfigAuditLog` (post-F3 schema: `EventId`, `CorrelationId` columns) |
|
||||
|
||||
### Cluster-scoped editors (3 files) — port as reusable inputs
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `IdentificationFields.razor` | Port |
|
||||
| `ImportEquipment.razor` | Port |
|
||||
| `ScriptEditor.razor` | Port |
|
||||
|
||||
### Cross-cluster pages (8 files) — mixed
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Hosts.razor` | Port — reshape to "Akka cluster members" (showing `host:port` NodeIds, roles, redundancy state) |
|
||||
| `Certificates.razor` | Port — F13a's `PkiStoreRoot` becomes the data source |
|
||||
| `Reservations.razor` | Port |
|
||||
| `RoleGrants.razor` | **Reshape.** v1 was cluster-scoped role grants; v2 uses LDAP group → role mapping (see Q4 below) |
|
||||
| `AlarmsHistorian.razor` | Port — wire to F11's `HistorianAdapterActor.GetStatus` (queue depth + drain state) |
|
||||
| `ScriptLog.razor` | Port — needs SignalR hub bridge (F16 deferred ScriptLogHub) |
|
||||
| `ScriptedAlarms.razor` (top-level) | **Possibly drop** (see Q2 below) |
|
||||
| `VirtualTags.razor` (top-level) | **Possibly drop** (see Q2 below) |
|
||||
|
||||
### Driver-typed editors (5 files) — sequencing decision needed
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Drivers/FocasDetail.razor` | Defer — JSON editor in `DriversTab` covers the same config initially |
|
||||
| `Modbus/ModbusOptionsEditor.razor` | Same |
|
||||
| `Modbus/ModbusAddressEditor.razor` | Same |
|
||||
| `Modbus/ModbusAddressPreview.razor` | Same |
|
||||
| `Modbus/ModbusDiagnostics.razor` | Port — separate from the config editor, this is operational telemetry |
|
||||
|
||||
### Account (1 file) — port
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Account.razor` | Port — minor reshape for JWT (token expiry UI, refresh button) |
|
||||
|
||||
## Summary by disposition
|
||||
|
||||
| Disposition | Count |
|
||||
|---|---|
|
||||
| Already in v2 | 5 |
|
||||
| Port as-is | 22 |
|
||||
| Port + reshape | 7 |
|
||||
| **Drop (replaced by live-edit / Deployments page)** | **5** |
|
||||
| Drop (redundant with cluster tab) | 2 (pending Q2) |
|
||||
| Defer (driver-typed editors) | 4 |
|
||||
| **Total active rebuild** | ~30 pages |
|
||||
|
||||
## Open design questions
|
||||
|
||||
These need answers before per-page sequencing starts. They drive how many
|
||||
phases the rebuild takes and what gets cut.
|
||||
|
||||
### Q1 — Driver-typed editors: ship now or defer?
|
||||
|
||||
**Context.** v1 had typed editors for Modbus + FOCAS driver config. They sat
|
||||
behind a generic JSON editor for the other six driver types. The typed editors
|
||||
caught operator typos that the JSON editor missed (port ranges, slave-ID
|
||||
collisions, address-map overlaps).
|
||||
|
||||
**Options.**
|
||||
- **Defer all typed editors.** Ship `DriversTab` with a JSON editor first; add
|
||||
typed editors per-driver as field requests come in. Saves ~1 day on F15.
|
||||
- **Port the existing two.** Modbus + FOCAS were already validated against
|
||||
field use. The other six driver types stay JSON-only.
|
||||
- **Ship all eight typed editors.** Most work, best UX. ~3 extra days on F15.
|
||||
|
||||
**Recommendation:** Defer. The OPC UA dual-endpoint tests + driver
|
||||
engine wiring (F7-F10) are higher-leverage and need attention first.
|
||||
|
||||
### Q2 — Top-level `ScriptedAlarms.razor` and `VirtualTags.razor`: keep or drop?
|
||||
|
||||
**Context.** In v1, these were fleet-wide views of every scripted alarm and
|
||||
virtual tag across every cluster. The cluster tabs let you edit them; the
|
||||
top-level pages let you find them across clusters.
|
||||
|
||||
**Options.**
|
||||
- **Drop.** Fleet-wide view is rare; cluster scope covers 95% of use.
|
||||
- **Keep as read-only.** Cross-cluster search + drill-down to the per-cluster tab.
|
||||
|
||||
**Recommendation:** Drop, but expose a global search on the top nav that
|
||||
matches cluster + alarm/tag names if operators ask.
|
||||
|
||||
### Q3 — ClusterDetail: 10 tabs or split routes?
|
||||
|
||||
**Context.** v1 had 10 nav-tabs inside `ClusterDetail.razor`. Some are very
|
||||
heavy (Tags can be 10k rows; AuditTab streams). All 10 share render state.
|
||||
|
||||
**Options.**
|
||||
- **Keep tabs.** Familiar; one URL per cluster.
|
||||
- **Split into routes.** `/clusters/{id}/equipment`, `/clusters/{id}/tags`,
|
||||
etc. Better deep-linking, better load (one tab's data per page), easier auth
|
||||
scoping.
|
||||
|
||||
**Recommendation:** Split into routes. The v1 monolith was already groaning
|
||||
under the live-update SignalR fan-in; routes let each surface manage its own
|
||||
subscription lifecycle.
|
||||
|
||||
### Q4 — RoleGrants: cluster-scoped table or LDAP group → role map?
|
||||
|
||||
**Context.** v1 had a per-cluster `RoleGrants` table where you mapped users to
|
||||
cluster-scoped roles (ClusterAdmin, ClusterOperator, etc.). v2 introduced
|
||||
LDAP-driven auth: LDAP group membership maps to OPC UA permissions
|
||||
(`ReadOnly`, `WriteOperate`, `WriteTune`, `WriteConfigure`, `AlarmAck`)
|
||||
fleet-wide.
|
||||
|
||||
**Options.**
|
||||
- **Keep v1 model.** Cluster-scoped grants survive; LDAP just provides the
|
||||
username.
|
||||
- **Replace with fleet-wide LDAP-group → role mapping.** v2's `LdapOptions`
|
||||
already has a `GroupToRole` dictionary; surface that in a single fleet-level
|
||||
page.
|
||||
- **Both.** LDAP map for fleet-wide defaults; per-cluster overrides for
|
||||
scoping.
|
||||
|
||||
**Recommendation:** Fleet-wide LDAP-group → role map only. Per-cluster scoping
|
||||
adds combinatorial complexity that v2's redundancy model doesn't need
|
||||
(every driver-role node runs every driver in the fleet).
|
||||
|
||||
### Q5 — Login UI: backed by `/auth/login` (cookie+JWT hybrid) — what about LDAP error UX?
|
||||
|
||||
**Context.** v2's `/auth/login` does an LDAP bind. Failures come back as
|
||||
specific reasons (invalid creds vs. service-account misconfig vs. server
|
||||
unreachable). The default behavior is to lump them all into "Login failed."
|
||||
|
||||
**Options.**
|
||||
- **Generic "Login failed."** Safer; doesn't leak whether the username exists.
|
||||
- **Specific error categories.** Helps operators diagnose deploy issues.
|
||||
|
||||
**Recommendation:** Generic for production deployments, specific when
|
||||
`Authentication:Ldap:AllowInsecureLdap=true` (dev mode signal).
|
||||
|
||||
## Proposed sequencing (4 phases)
|
||||
|
||||
Each phase is independently mergeable. The branch ships when Phase A is in;
|
||||
Phases B–D can follow as smaller PRs.
|
||||
|
||||
### Phase A — Shell + auth + fleet (minimum-viable Admin)
|
||||
~½–1 day. Ships a working admin surface with no config editing.
|
||||
- Port `App.razor`, `Routes.razor`, `_Imports.razor`
|
||||
- Port `Login.razor` (post Q5)
|
||||
- Port `Account.razor`
|
||||
- Reshape `Fleet.razor` against v2 data sources
|
||||
- Port `Hosts.razor` reshape
|
||||
|
||||
### Phase B — Cluster CRUD + Overview/Redundancy tabs
|
||||
~1 day. Adds cluster browse + readonly redundancy view.
|
||||
- Port `ClustersList`, `NewCluster`, `ClusterDetail` (Overview tab only)
|
||||
- Port `RedundancyTab` (read-only — surfaces v2 `ServiceLevel`)
|
||||
- Split into routes if Q3 = split
|
||||
|
||||
### Phase C — Config editor tabs
|
||||
~2 days. The big chunk — the live-edit config surface.
|
||||
- `EquipmentTab`, `UnsTab`, `NamespacesTab`
|
||||
- `DriversTab` (JSON-only initially per Q1)
|
||||
- `TagsTab`
|
||||
- `AclsTab` post Q4 reshape
|
||||
- `ImportEquipment`, `IdentificationFields`
|
||||
|
||||
### Phase D — Logic + ops pages
|
||||
~1 day.
|
||||
- `VirtualTagsTab`, `ScriptedAlarmsTab`, `ScriptsTab`, `ScriptEditor`
|
||||
- `AuditTab` against new ConfigAuditLog schema
|
||||
- `RoleGrants` post Q4 reshape
|
||||
- `Certificates`
|
||||
- `Reservations`
|
||||
- `AlarmsHistorian`, `ScriptLog` (depends on F16 ScriptLogHub deferred)
|
||||
|
||||
## Out of scope for F15
|
||||
|
||||
- Typed driver editors (Q1, deferred unless reversed)
|
||||
- Top-level fleet-wide ScriptedAlarms / VirtualTags pages (Q2, recommended drop)
|
||||
- Per-cluster RoleGrants (Q4, recommended drop)
|
||||
- ScriptLogHub SignalR bridge (F16 deferred — only needed for Phase D's
|
||||
ScriptLog page; can move to a separate F16-extension follow-up)
|
||||
@@ -0,0 +1,128 @@
|
||||
# OtOpcUa v2 Architecture
|
||||
|
||||
Single-page tour of the v2 layout. For decision history + tradeoffs, see [`2026-05-26-akka-hosting-alignment-design.md`](../plans/2026-05-26-akka-hosting-alignment-design.md).
|
||||
|
||||
## Big picture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ OtOpcUa.Host │ (fused binary)
|
||||
│ │
|
||||
│ reads OTOPCUA_ROLES env, mounts: │
|
||||
│ ┌─────────────────────────────────────┐ │
|
||||
│ │ admin → Blazor + auth + control- │ │
|
||||
│ │ plane singletons │ │
|
||||
│ │ driver → OPC UA endpoint + │ │
|
||||
│ │ per-node actors │ │
|
||||
│ └─────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────┘
|
||||
│
|
||||
│ joins
|
||||
▼
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ Akka.NET cluster │
|
||||
│ (split-brain resolver: keep-oldest, 15s) │
|
||||
└─────────────────────────────────────────────┘
|
||||
|
||||
shared by every node: ┌─────────────────┐
|
||||
│ ConfigDb (SQL) │ live-edit + Deployment artifacts + audit
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
The v1 setup was two separate Windows services (`OtOpcUa.Server` + `OtOpcUa.Admin`) talking through the DB. v2 collapses them into one binary with role gating, and adds an Akka cluster so admin singletons can drive deploys and the redundancy story is automatic.
|
||||
|
||||
## Project layout
|
||||
|
||||
```
|
||||
src/Core/ shared abstractions, no Server deps
|
||||
ZB.MOM.WW.OtOpcUa.Commons types + Akka message contracts + interfaces
|
||||
ZB.MOM.WW.OtOpcUa.Cluster HOCON, AkkaClusterOptions, IClusterRoleInfo
|
||||
ZB.MOM.WW.OtOpcUa.Configuration EF Core DbContext + entities
|
||||
|
||||
src/Server/ server-side projects
|
||||
ZB.MOM.WW.OtOpcUa.Security cookie+JWT auth, LDAP, JwtTokenService
|
||||
ZB.MOM.WW.OtOpcUa.ControlPlane admin-role cluster singletons
|
||||
ZB.MOM.WW.OtOpcUa.Runtime driver-role per-node actors
|
||||
ZB.MOM.WW.OtOpcUa.OpcUaServer OPC UA endpoint facade + Phase7Composer
|
||||
ZB.MOM.WW.OtOpcUa.AdminUI Blazor Razor class library
|
||||
ZB.MOM.WW.OtOpcUa.Host fused binary (Program.cs)
|
||||
```
|
||||
|
||||
| Project | Role | Doc |
|
||||
|---|---|---|
|
||||
| Cluster | Bootstrap + cluster topology view | [Cluster.md](Cluster.md) |
|
||||
| ControlPlane | Admin singletons (deploy, audit, fleet, redundancy) | [ControlPlane.md](ControlPlane.md) |
|
||||
| Runtime | Driver-role actor tree | [Runtime.md](Runtime.md) |
|
||||
| Security | Cookie+JWT auth, LDAP, /auth/* endpoints | [../security.md](../security.md) |
|
||||
| OpcUaServer | OPC UA endpoint host + composer | [../OpcUaServer.md](../OpcUaServer.md) |
|
||||
| Host | Role-gated DI graph + Program.cs | [../ServiceHosting.md](../ServiceHosting.md) |
|
||||
|
||||
## Role gating
|
||||
|
||||
`Program.cs` reads `OTOPCUA_ROLES` once (per process) and decides what to wire:
|
||||
|
||||
```csharp
|
||||
var roles = RoleParser.Parse(Environment.GetEnvironmentVariable("OTOPCUA_ROLES"));
|
||||
var hasAdmin = roles.Contains("admin");
|
||||
var hasDriver = roles.Contains("driver");
|
||||
|
||||
builder.Services.AddOtOpcUaConfigDb(builder.Configuration);
|
||||
builder.Services.AddOtOpcUaCluster(builder.Configuration);
|
||||
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp); // HOCON + remote + cluster options
|
||||
if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
|
||||
if (hasDriver) ab.WithOtOpcUaRuntimeActors();
|
||||
});
|
||||
|
||||
if (hasAdmin)
|
||||
{
|
||||
builder.Services.AddOtOpcUaAuth(builder.Configuration);
|
||||
builder.Services.AddAdminUI();
|
||||
// SignalR, AdminOpsClient, etc.
|
||||
}
|
||||
|
||||
builder.Services.AddOtOpcUaHealth();
|
||||
```
|
||||
|
||||
There is a **single** ActorSystem. Cluster singletons + per-node actors share it via the `Akka.Hosting` registry. This was a v2 fix (the initial Phase 9 wiring ran two ActorSystems by mistake; see commit `d6fac2d`).
|
||||
|
||||
## Live-edit vs draft/publish
|
||||
|
||||
v1 had `ConfigGeneration(Draft|Published)` with every live-edit entity FK'd to a generation. Edits accumulated in a Draft until Publish promoted them.
|
||||
|
||||
v2 removes that entirely:
|
||||
|
||||
- No `ConfigGeneration` table, no `GenerationId` columns.
|
||||
- Every live-edit entity has a `RowVersion` (`IsRowVersion()`) for last-write-wins.
|
||||
- Audit goes to `ConfigEdit` (per-row delta) and `ConfigAuditLog` (event-level).
|
||||
- Deploys snapshot the *current* DB state into an immutable `Deployment.ArtifactBlob` + its `RevisionHash`. That artifact is what driver nodes apply.
|
||||
|
||||
See [ControlPlane.md § Deploy flow](ControlPlane.md#deploy-flow) for the end-to-end dispatch + ACK + seal sequence.
|
||||
|
||||
## NodeId
|
||||
|
||||
Each cluster member has a `NodeId` derived as `{PublicHostname}:{Port}` of the Akka remote endpoint. `ClusterRoleInfo.LocalNode` + `ConfigPublishCoordinator.DiscoverDriverNodes()` use the same formula so they always agree. The port suffix makes loopback test deployments distinguishable (commit `5cfbe8b`); in production the hostname alone is already unique.
|
||||
|
||||
## Health endpoints
|
||||
|
||||
| Path | Returns 200 when… |
|
||||
|---|---|
|
||||
| `/healthz` | Process is alive (no checks). |
|
||||
| `/health/ready` | DB reachable + this node is `Up` in the cluster. |
|
||||
| `/health/active` | This node is the admin role-leader (used by Traefik/HA-LB to pin traffic). |
|
||||
|
||||
## What lives where (quick map)
|
||||
|
||||
| Concern | Project | Entry point |
|
||||
|---|---|---|
|
||||
| Read OTOPCUA_ROLES | `Cluster.RoleParser` | static `Parse(string?)` |
|
||||
| Cluster lifecycle | `Cluster.WithOtOpcUaClusterBootstrap` | extension on `AkkaConfigurationBuilder` |
|
||||
| Local node identity | `Cluster.IClusterRoleInfo.LocalNode` | DI singleton |
|
||||
| Admin singletons | `ControlPlane.WithOtOpcUaControlPlaneSingletons` | extension on `AkkaConfigurationBuilder` |
|
||||
| Driver actors | `Runtime.WithOtOpcUaRuntimeActors` | extension on `AkkaConfigurationBuilder` |
|
||||
| Auth pipeline | `Security.AddOtOpcUaAuth` + `MapOtOpcUaAuth` | extensions on `IServiceCollection` / `IEndpointRouteBuilder` |
|
||||
| OPC UA facade | `OpcUaServer.OpcUaApplicationHost` | runtime host, started by driver-role startup |
|
||||
| Partner-URI advertising | `OpcUaServer.OpcUaApplicationHost.PopulateServerArray` | runs after `_application.Start`, appends `PeerApplicationUris` to the SDK `ServerUris` `StringTable` so `Server.ServerArray` (i=2254) returns self + peers |
|
||||
| Health endpoints | `Host.Health.AddOtOpcUaHealth` + `MapOtOpcUaHealth` | extensions on `IServiceCollection` / `IEndpointRouteBuilder` |
|
||||
@@ -0,0 +1,104 @@
|
||||
# OtOpcUa.Cluster
|
||||
|
||||
Akka.NET cluster bootstrap + topology view. Used by every other server-side project to talk to the live cluster.
|
||||
|
||||
Path: `src/Core/ZB.MOM.WW.OtOpcUa.Cluster/`
|
||||
|
||||
## Public surface
|
||||
|
||||
| Type | Role |
|
||||
|---|---|
|
||||
| `AkkaClusterOptions` | DI-bound options from `appsettings.json::Cluster`. Hostname/Port/PublicHostname/SeedNodes/Roles. |
|
||||
| `IClusterRoleInfo` (interface in Commons) | Live view of cluster membership + role-leader topology. Thread-safe + event-raising. |
|
||||
| `ClusterRoleInfo` | Implementation. Subscribes to `ClusterEvent.IMemberEvent` + `RoleLeaderChanged` + `LeaderChanged`. |
|
||||
| `HoconLoader.LoadBaseConfig()` | Reads the embedded `Resources/akka.conf`. |
|
||||
| `RoleParser.Parse(string?)` | Parses `OTOPCUA_ROLES` env var into a deduped `string[]`. |
|
||||
| `ServiceCollectionExtensions.AddOtOpcUaCluster(configuration)` | Binds options + registers `IClusterRoleInfo` singleton. **Does not** start an ActorSystem. |
|
||||
| `WithOtOpcUaClusterBootstrap(serviceProvider)` | Extension on `AkkaConfigurationBuilder`. Loads embedded HOCON + applies `WithRemoting(...)` + `WithClustering(...)` from options. |
|
||||
|
||||
## Bootstrap flow
|
||||
|
||||
```csharp
|
||||
// Program.cs
|
||||
builder.Services.AddOtOpcUaCluster(builder.Configuration);
|
||||
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp); // HOCON + remote + cluster
|
||||
// …singletons + node actors layered on
|
||||
});
|
||||
```
|
||||
|
||||
Order matters: `AddOtOpcUaCluster` must come before `AddAkka` so the options binding has run by the time the `AddAkka` lambda fires. Inside the lambda, `WithOtOpcUaClusterBootstrap` resolves `IOptions<AkkaClusterOptions>` from `sp` and writes them into the Akka builder.
|
||||
|
||||
The single ActorSystem this produces is what every other v2 piece runs on. There is no second Akka instance — that was a Phase 9 bug (commit `d6fac2d` consolidated).
|
||||
|
||||
## Embedded HOCON
|
||||
|
||||
`src/Core/ZB.MOM.WW.OtOpcUa.Cluster/Resources/akka.conf` contains:
|
||||
|
||||
| Setting | Value | Why |
|
||||
|---|---|---|
|
||||
| `akka.actor.provider` | `cluster` | Required for `Cluster.Get(system)` to work. |
|
||||
| `akka.cluster.split-brain-resolver.active-strategy` | `keep-oldest` | Smaller/younger side downs itself on partition. |
|
||||
| `akka.cluster.split-brain-resolver.stable-after` | `15s` | Time before SBR acts. |
|
||||
| `akka.cluster.failure-detector.threshold` | `10.0` | Higher than default (8.0) for GC-pause tolerance. |
|
||||
| `opcua-synchronized-dispatcher.type` | `PinnedDispatcher` | Dedicated thread for `OpcUaPublishActor` so SDK calls stay marshalled. |
|
||||
|
||||
The Cluster.Tests project verifies these key values stay correct (`HoconLoaderTests`).
|
||||
|
||||
## Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"Cluster": {
|
||||
"Hostname": "0.0.0.0",
|
||||
"Port": 4053,
|
||||
"PublicHostname": "node-a.lan",
|
||||
"SeedNodes": ["akka.tcp://otopcua@node-a.lan:4053"],
|
||||
"Roles": ["admin", "driver"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `Hostname`: interface to bind. `0.0.0.0` listens on every interface.
|
||||
- `Port`: TCP port for cluster gossip. Default 4053.
|
||||
- `PublicHostname`: address advertised in cluster gossip. Must be reachable by every other node.
|
||||
- `SeedNodes`: where new nodes go to join. List one (or two) stable nodes. First node bootstraps the cluster from its own address.
|
||||
- `Roles`: free-form tags Akka gossip propagates. v2 uses `admin` + `driver`; per-role wiring in `Program.cs` reads `OTOPCUA_ROLES` env var, not this list — these two should stay in sync.
|
||||
|
||||
Per-role overlay files (`appsettings.admin.json`, `appsettings.driver.json`, `appsettings.admin-driver.json`) layer on top of base `appsettings.json` based on the parsed `OTOPCUA_ROLES` (alphabetical, joined by `-`). See [ServiceHosting.md § Per-role configuration overlays](../ServiceHosting.md#per-role-configuration-overlays).
|
||||
|
||||
## IClusterRoleInfo
|
||||
|
||||
Anywhere in the host that needs the local node's identity or a view of who-else-is-in-the-cluster, inject `IClusterRoleInfo`:
|
||||
|
||||
```csharp
|
||||
public sealed class MyService(IClusterRoleInfo cluster)
|
||||
{
|
||||
public NodeId Self => cluster.LocalNode;
|
||||
public IReadOnlyList<NodeId> Drivers => cluster.MembersWithRole("driver");
|
||||
public NodeId? AdminLeader => cluster.RoleLeader("admin");
|
||||
|
||||
public MyService(IClusterRoleInfo cluster)
|
||||
{
|
||||
cluster.RoleLeaderChanged += (_, e) =>
|
||||
Console.WriteLine($"role={e.Role}: {e.PreviousLeader} → {e.NewLeader}");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`LocalNode` is `{PublicHostname}:{Port}` (the port suffix lets loopback test deployments stay distinct; production hostnames are already unique). `ConfigPublishCoordinator` uses the same `{host}:{port}` formula so the expected-ack set and the driver self-identification agree (commit `5cfbe8b`).
|
||||
|
||||
## Lifecycle
|
||||
|
||||
Akka.Hosting owns the lifecycle: `IHostedService` starts the ActorSystem at host start, runs `CoordinatedShutdown.ClusterLeavingReason` on host stop. The Cluster project does not register its own `IHostedService` (the v1 `AkkaHostedService` was deleted in commit `d6fac2d`).
|
||||
|
||||
## Tests
|
||||
|
||||
`tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests/` covers:
|
||||
|
||||
- `HoconLoaderTests` — embedded resource loads + key settings parse correctly.
|
||||
- `RoleParserTests` — comma-split + dedup + trim semantics.
|
||||
|
||||
Cross-project integration is in `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/` (cluster formation, deploy round-trip).
|
||||
@@ -0,0 +1,99 @@
|
||||
# OtOpcUa.ControlPlane
|
||||
|
||||
Five admin-role cluster singletons that drive the v2 deploy, audit, fleet, and redundancy stories. Path: `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/`.
|
||||
|
||||
## Singletons
|
||||
|
||||
| Actor | File | Marker key | Role |
|
||||
|---|---|---|---|
|
||||
| `ConfigPublishCoordinator` | `Coordinators/ConfigPublishCoordinator.cs` | `ConfigPublishCoordinatorKey` | Dispatches `DispatchDeployment`, collects `ApplyAck`s, seals/fails/times-out. |
|
||||
| `AdminOperationsActor` | `AdminOperations/AdminOperationsActor.cs` | `AdminOperationsActorKey` | Receives `StartDeployment` from the UI, snapshots ConfigDb via `ConfigComposer`, persists `Deployment` row + `ConfigEdit` marker, tells the coordinator to dispatch. |
|
||||
| `AuditWriterActor` | `Audit/AuditWriterActor.cs` | `AuditWriterActorKey` | Batched `ConfigAuditLog` writer. Flushes every 500 events or 5 s. In-buffer dedup; cross-restart dedup tracked as F3. |
|
||||
| `FleetStatusBroadcaster` | `Fleet/FleetStatusBroadcaster.cs` | `FleetStatusBroadcasterKey` | Aggregates per-node `FleetNodeStatus` heartbeats; publishes `FleetStatusChanged` on the `fleet-status` DPS topic (SignalR bridge tracked as F16). |
|
||||
| `RedundancyStateActor` | `Redundancy/RedundancyStateActor.cs` | `RedundancyStateActorKey` | Cluster-event subscriber; debounces 250 ms; publishes `RedundancyStateChanged` on the `redundancy-state` DPS topic. |
|
||||
|
||||
All five register via `WithOtOpcUaControlPlaneSingletons()` (extension on `AkkaConfigurationBuilder`). Each uses `ClusterSingletonOptions { Role = "admin" }` so the singleton runs on the admin role-leader and migrates to the next admin node on failover.
|
||||
|
||||
```csharp
|
||||
// Program.cs (admin role only)
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp);
|
||||
if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
|
||||
if (hasDriver) ab.WithOtOpcUaRuntimeActors();
|
||||
});
|
||||
```
|
||||
|
||||
Resolve from anywhere via `IRequiredActor<T>` or the `ActorRegistry`:
|
||||
|
||||
```csharp
|
||||
public sealed class AdminOperationsClient(ActorRegistry registry) : IAdminOperationsClient
|
||||
{
|
||||
private readonly IActorRef _proxy = registry.Get<AdminOperationsActorKey>();
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
## Deploy flow
|
||||
|
||||
```
|
||||
UI → IAdminOperationsClient.StartDeploymentAsync(createdBy)
|
||||
│ Ask the AdminOperationsActor singleton proxy
|
||||
▼
|
||||
AdminOperationsActor
|
||||
│ ConfigComposer.SnapshotAndFlattenAsync(db) → ConfigArtifact(blob, revHash)
|
||||
│ insert Deployment(Dispatching) + ConfigEdit marker
|
||||
│ Tell coordinator → DispatchDeployment
|
||||
▼
|
||||
ConfigPublishCoordinator
|
||||
│ DiscoverDriverNodes() → expected ACK set (host:port per member)
|
||||
│ insert NodeDeploymentState(Applying) per driver
|
||||
│ Publish DispatchDeployment on "deployments" topic
|
||||
│ Start apply-deadline timer (2 min default)
|
||||
▼ DistributedPubSub
|
||||
DriverHostActor (on each driver node — subscribed to "deployments")
|
||||
│ PreStart subscribed; current state Steady(rev)
|
||||
│ if currentRev == msg.rev → immediate ApplyAck(Applied) (idempotent)
|
||||
│ else Become(Applying) → write NodeDeploymentStatus → ApplyAck
|
||||
▼ via "deployment-acks" topic
|
||||
ConfigPublishCoordinator (subscribed to "deployment-acks" in PreStart)
|
||||
│ PersistNodeAck + collect
|
||||
│ all-Applied → Sealed
|
||||
│ any-Failed → PartiallyFailed
|
||||
│ deadline → TimedOut
|
||||
```
|
||||
|
||||
The dedicated `deployment-acks` topic + coordinator subscription was added in commit `5cfbe8b`. Before that, ACKs were published back on `deployments` and the coordinator (not subscribed) silently dropped them — deployments hung at `AwaitingApplyAcks` forever in multi-node tests.
|
||||
|
||||
### Failover recovery
|
||||
|
||||
If the admin singleton fails over mid-deploy, the new instance's `PreStart` queries `NodeDeploymentState` for any `Dispatching`/`AwaitingApplyAcks` row, rebuilds `_expectedAcks` + `_acks` from persisted state, and resumes the deadline timer. See `Coordinators/ConfigPublishCoordinator.cs::PreStart`.
|
||||
|
||||
## ConfigComposer
|
||||
|
||||
Pure function `SnapshotAndFlattenAsync(db) → ConfigArtifact(byte[], string)`:
|
||||
|
||||
1. Reads every live-edit table.
|
||||
2. Serialises to a stable byte[] (deterministic ordering).
|
||||
3. Computes SHA-256 over the bytes → 64-hex `RevisionHash`.
|
||||
|
||||
Same DB state → same artifact + same hash. That's what makes the `NoChanges` outcome work (AdminOperations compares the proposed hash to the last sealed deployment's hash).
|
||||
|
||||
## ServiceLevelCalculator
|
||||
|
||||
Pure function exposed at `Redundancy/ServiceLevelCalculator.Compute(NodeHealthInputs)`. Returns the OPC UA `ServiceLevel` byte per the truth table in [Redundancy.md](../Redundancy.md#servicelevel-tiers-part-5-65). No side effects; trivially unit-testable.
|
||||
|
||||
## DPS topics
|
||||
|
||||
| Topic | Publisher | Subscribers |
|
||||
|---|---|---|
|
||||
| `deployments` | ConfigPublishCoordinator | DriverHostActor (per-node) |
|
||||
| `deployment-acks` | DriverHostActor | ConfigPublishCoordinator |
|
||||
| `fleet-status` | FleetStatusBroadcaster | (SignalR bridge — F16) |
|
||||
| `redundancy-state` | RedundancyStateActor | (per-node ServiceLevel calc — F10) |
|
||||
|
||||
## Tests
|
||||
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/` — 29 tests covering coordinator (happy path, timeout, failover recovery), AdminOps (StartDeployment outcomes), AuditWriter (batching, dedup), FleetStatusBroadcaster (heartbeat staleness), RedundancyStateActor (debounce, snapshot), ConfigComposer (purity), ServiceLevelCalculator (truth table).
|
||||
|
||||
Multi-node tests (cross-ActorSystem) are in `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/`.
|
||||
@@ -151,8 +151,11 @@ substantive driver change, and revise this table when the data does.
|
||||
`SubscriptionRegistry`, or a downstream consumer retaining
|
||||
`DataValueSnapshot` references past their useful life.
|
||||
|
||||
## Scripted-alarm engine — known hot-path allocations
|
||||
## Scripted-alarm engine — hot-path allocation reuse
|
||||
|
||||
`ScriptedAlarmEngine.BuildReadCache` allocates a fresh `Dictionary<string, DataValueSnapshot>` and `AlarmPredicateContext` on every predicate evaluation — i.e. once per upstream tag change per referencing alarm. On a busy line where many tags feeding many alarms change frequently, this is a steady stream of short-lived dictionary allocations on the hot path. (Core.ScriptedAlarms-009)
|
||||
`ScriptedAlarmEngine` keeps a per-alarm reusable evaluation scratch in `_scratchByAlarmId` — the read-cache `Dictionary<string, DataValueSnapshot>` and the `AlarmPredicateContext` are allocated once per alarm (on first evaluation) and refilled in place across every subsequent predicate evaluation. The hot path no longer allocates a fresh dictionary + context per upstream tag change. (Core.ScriptedAlarms-009)
|
||||
|
||||
The allocations are deliberate for now: predicate evaluation is already serialised under `_evalGate`, so a single reused scratch dictionary would be safe, but the per-call dictionary keeps the evaluation surface immutable and trivially safe against future refactors. If a future scripted-alarm soak surfaces allocation pressure on this path, the mitigation is a per-alarm scratch buffer cleared between evaluations — note here before changing the engine.
|
||||
Safety: reuse is serialised under `_evalGate`, so two threads can never observe the same scratch in a half-refilled state. The context wraps the read-cache by reference, so refilling the dictionary is what the predicate's `ctx.GetTag(path)` calls observe. `LoadAsync` clears `_scratchByAlarmId` alongside `_alarms` so a config-publish drops the prior generation's scratch (a new generation may carry different `Inputs` / `Logger`). Regression tests in `ScriptedAlarmEngineTests` lock the reuse contract:
|
||||
- `Reevaluation_reuses_the_same_read_cache_dictionary` — asserts dictionary instance identity across two evaluations.
|
||||
- `Reevaluation_reuses_the_same_predicate_context` — same, for the context.
|
||||
- `LoadAsync_drops_the_prior_generations_scratch` — asserts a publish resets the scratch.
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
# OtOpcUa.Runtime
|
||||
|
||||
Driver-role actor tree — one set per node. Path: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/`.
|
||||
|
||||
## Actor tree
|
||||
|
||||
```
|
||||
DriverHostActor (per node)
|
||||
│ state machine: Steady ⇄ Applying ⇄ Stale
|
||||
│
|
||||
├──▶ DriverInstanceActor (per configured DriverInstance row)
|
||||
│ state: Connecting → Connected → Reconnecting (or Stubbed)
|
||||
│
|
||||
├──▶ VirtualTagActor (per VirtualTag row)
|
||||
│ compiles + evaluates expression, publishes derived value
|
||||
│
|
||||
├──▶ ScriptedAlarmActor (per ScriptedAlarm row)
|
||||
│ state: Inactive ⇄ Active ⇄ Acknowledged
|
||||
│
|
||||
├──▶ OpcUaPublishActor (per node, pinned dispatcher)
|
||||
│ marshalled OPC UA SDK writes + RebuildAddressSpace
|
||||
│
|
||||
├──▶ HistorianAdapterActor (per node)
|
||||
│ pipe IPC to Wonderware historian sidecar
|
||||
│
|
||||
├──▶ PeerOpcUaProbeActor (per peer node)
|
||||
│ opc.tcp ping → redundancy-state DPS topic
|
||||
│
|
||||
└──▶ DbHealthProbeActor (per node)
|
||||
cached SELECT 1; consumed by /health/ready + redundancy calc
|
||||
```
|
||||
|
||||
## Public surface
|
||||
|
||||
| Type | File |
|
||||
|---|---|
|
||||
| `WithOtOpcUaRuntimeActors()` | `ServiceCollectionExtensions.cs` — extension on `AkkaConfigurationBuilder`. Spawns `DriverHostActor` + `DbHealthProbeActor` on the host's ActorSystem. |
|
||||
| `DriverHostActor` | `Drivers/DriverHostActor.cs` |
|
||||
| `DriverInstanceActor` | `Drivers/DriverInstanceActor.cs` |
|
||||
| `VirtualTagActor` | `VirtualTags/VirtualTagActor.cs` |
|
||||
| `ScriptedAlarmActor` | `ScriptedAlarms/ScriptedAlarmActor.cs` |
|
||||
| `OpcUaPublishActor` | `OpcUa/OpcUaPublishActor.cs` |
|
||||
| `HistorianAdapterActor` | `Historian/HistorianAdapterActor.cs` |
|
||||
| `PeerOpcUaProbeActor` | `Health/PeerOpcUaProbeActor.cs` |
|
||||
| `DbHealthProbeActor` | `Health/DbHealthProbeActor.cs` |
|
||||
|
||||
Marker keys for registry lookup: `DriverHostActorKey`, `DbHealthProbeActorKey`.
|
||||
|
||||
## DriverHostActor
|
||||
|
||||
Per-node supervisor with three Become states:
|
||||
|
||||
| State | Meaning |
|
||||
|---|---|
|
||||
| `Steady(rev)` | Caught up. `DispatchDeployment` with `msg.rev == currentRev` → immediate `ApplyAck(Applied)` (idempotent). New rev → `Become(Applying)`. |
|
||||
| `Applying(id)` | Apply in progress. Further `DispatchDeployment` for in-flight ID → debug-log + ignore. For new ID → defer via `Self.Forward`. |
|
||||
| `Stale` | ConfigDb unreachable on bootstrap. Periodic `RetryConfigDbConnection` tries to advance to `Steady`. |
|
||||
|
||||
`PreStart`:
|
||||
|
||||
1. Subscribe to `deployments` DPS topic.
|
||||
2. Read most-recent `NodeDeploymentState` for this node from ConfigDb.
|
||||
3. If `Applied` → restore `_currentRevision`, `Become(Steady)`.
|
||||
4. If `Applying` (orphan from crash) → replay apply (idempotent).
|
||||
5. If `Failed` → `Become(Steady)` at last known rev.
|
||||
6. DB unreachable → `Become(Stale)`, start retry timer.
|
||||
|
||||
ACK publishing: when no `_coordinatorOverride` is set (production), `SendAck` publishes on the dedicated `deployment-acks` DPS topic which the coordinator subscribes to (commit `5cfbe8b`).
|
||||
|
||||
## DriverInstanceActor
|
||||
|
||||
Per-driver-instance child. State machine:
|
||||
|
||||
- `Connecting` → first attempt to reach the underlying driver
|
||||
- `Connected` → subscriptions active, reads/writes flow
|
||||
- `Reconnecting` → temporary disconnect; backoff retry
|
||||
- `Stubbed` → DEV-STUB mode for Windows-only drivers (Galaxy, Wonderware Historian) on non-Windows or when `roles` contains `dev`
|
||||
|
||||
`ShouldStub(driverType, roles)` returns `true` for `"Galaxy" | "Historian.Wonderware"` on non-Windows; the actor goes straight to `Stubbed` and returns deterministic success without touching real hardware. Wiring this into the DriverHost child-spawn path is follow-up F20 (folds into F7).
|
||||
|
||||
Engine wiring (subscription publishing, ApplyDelta diff, bad-quality-on-disconnect, write path, supervisor backoff) is stubbed — tracked as F7. Tests exercise message contracts, not engine behaviour.
|
||||
|
||||
## VirtualTagActor / ScriptedAlarmActor
|
||||
|
||||
Skeleton state machines + message handlers. Engine work:
|
||||
|
||||
- `VirtualTagEngine.Evaluate()` not yet called from `VirtualTagActor.DependencyValueChanged` (F8).
|
||||
- `AlarmConditionService` not yet called from `ScriptedAlarmActor` (F9).
|
||||
- `ScriptedAlarmState` DB persistence on `PreRestart` not wired (F9).
|
||||
|
||||
## OpcUaPublishActor
|
||||
|
||||
The only actor on the **pinned dispatcher** (`opcua-synchronized-dispatcher` from `akka.conf`). All OPC UA SDK address-space writes go through it so the SDK's threading model isn't violated.
|
||||
|
||||
Message contracts are defined; actual SDK calls are stubbed (counters only). Real address-space writes + `ServiceLevel` Variable updates + `RebuildAddressSpace` after a deploy land in F10 (gated on F13 — full `OpcUaApplicationHost` extraction).
|
||||
|
||||
## HistorianAdapterActor, PeerOpcUaProbeActor
|
||||
|
||||
Both have message contracts wired. Engine integration deferred:
|
||||
|
||||
- `HistorianAdapterActor` — named-pipe IPC to the Wonderware historian sidecar + `SqliteStoreAndForwardSink` (F11).
|
||||
- `PeerOpcUaProbeActor` — real `opc.tcp://peer:4840` ping (F12). Current stub always returns `Ok=true`.
|
||||
|
||||
## DbHealthProbeActor
|
||||
|
||||
`Ask<DbHealthStatus>` returns cached state (refreshed every 5 s by an internal `SELECT 1`). Consumed by `/health/ready` and `RedundancyStateActor`.
|
||||
|
||||
## Lifecycle wiring
|
||||
|
||||
```csharp
|
||||
// Program.cs (driver role only)
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp);
|
||||
if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
|
||||
if (hasDriver) ab.WithOtOpcUaRuntimeActors();
|
||||
});
|
||||
```
|
||||
|
||||
`WithOtOpcUaRuntimeActors` resolves `IDbContextFactory<OtOpcUaConfigDbContext>` + `IClusterRoleInfo` from DI, then spawns `DbHealthProbeActor` and `DriverHostActor` as top-level `/user/` actors. Both register marker keys in `ActorRegistry` so the registry lookup works from anywhere.
|
||||
|
||||
## Tests
|
||||
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/` — 16 tests covering DriverHostActor (Steady ack, Applying transitions, Stale recovery), DriverInstanceActor (state machine, stub mode), VirtualTagActor + ScriptedAlarmActor (message contracts), OpcUaPublishActor (props + message acceptance), DbHealthProbe + PeerOpcUaProbe (probe loop), and the `WithOtOpcUaRuntimeActors` registration round-trip.
|
||||
|
||||
End-to-end deploy from admin → driver via the cluster is in `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/DeployHappyPathTests.cs`.
|
||||
+3
-3
@@ -36,7 +36,7 @@ Mirror ScadaLink's layout exactly:
|
||||
|
||||
```
|
||||
src/
|
||||
ZB.MOM.WW.OtOpcUa.Admin/ # Razor Components project (.NET 10)
|
||||
ZB.MOM.WW.OtOpcUa.AdminUI/ # Razor Components project (.NET 10)
|
||||
Auth/
|
||||
AuthEndpoints.cs # /auth/login, /auth/logout, /auth/token
|
||||
CookieAuthenticationStateProvider.cs # bridges cookie auth to Blazor <AuthorizeView>
|
||||
@@ -61,10 +61,10 @@ src/
|
||||
NotAuthorizedView.razor
|
||||
EndpointExtensions.cs # MapAuthEndpoints + role policies
|
||||
ServiceCollectionExtensions.cs # AddCentralAdmin
|
||||
ZB.MOM.WW.OtOpcUa.Admin.Security/ # LDAP + role mapping + JWT (sibling of ScadaLink.Security)
|
||||
ZB.MOM.WW.OtOpcUa.Security/ # LDAP + role mapping + JWT (sibling of ScadaLink.Security)
|
||||
```
|
||||
|
||||
The `Admin.Security` project carries `LdapAuthService`, `RoleMapper`, `JwtTokenService`, `AuthorizationPolicies`. If it ever makes sense to consolidate with ScadaLink's identical project, lift to a shared internal NuGet — out of scope for v2.0 to keep OtOpcUa decoupled from ScadaLink's release cycle.
|
||||
The `Security` project carries `LdapAuthService`, `RoleMapper`, `JwtTokenService`, `AuthorizationPolicies`. If it ever makes sense to consolidate with ScadaLink's identical project, lift to a shared internal NuGet — out of scope for v2.0 to keep OtOpcUa decoupled from ScadaLink's release cycle.
|
||||
|
||||
## Authentication & Authorization
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ Tie-in capability — **historian alarm sink**:
|
||||
| 3 | Evaluation trigger = **change-driven + timer-driven**; operator chooses per-tag | Change-driven is cheap at steady state; timer is the escape hatch for polling derivations that don't have a discrete "input changed" signal. |
|
||||
| 4 | Script shape = **Shape A — one script per virtual tag/alarm**; `return` produces the value (or `bool` for alarm condition) | Minimal surface; no predicate/action split. Alarm side-effects (severity, message) configured out-of-band, not in the script. |
|
||||
| 5 | Alarm fidelity = **full OPC UA Part 9** | Uniform with Galaxy + ALMD on the wire; client-side tooling (HMIs, historians, event pipelines) gets one shape. |
|
||||
| 6 | Sandbox = **read-only context**; scripts can only read any tag + write to virtual tags | Strict Roslyn `ScriptOptions` allow-list. Authoritative deny-list (`ForbiddenTypeAnalyzer`): namespace-prefix deny `System.IO`, `System.Net`, `System.Diagnostics`, `System.Reflection`, `System.Threading.Tasks` (Task / Parallel fan-out — Core.Scripting-003), `System.Runtime.InteropServices`, `Microsoft.Win32`; type-granular deny `System.Environment`, `System.AppDomain`, `System.GC`, `System.Activator`, `System.Threading.Thread` (these live directly in the allow-listed `System` / `System.Threading` namespaces, so a prefix rule cannot reach them without blocking primitives — Core.Scripting-001 / -009). |
|
||||
| 6 | Sandbox = **read-only context**; scripts can only read any tag + write to virtual tags | Strict Roslyn `ScriptOptions` allow-list. Authoritative deny-list (`ForbiddenTypeAnalyzer`): namespace-prefix deny `System.IO`, `System.Net`, `System.Diagnostics`, `System.Reflection`, `System.Threading.Tasks` (Task / Parallel fan-out — Core.Scripting-003), `System.Runtime.InteropServices`, `System.Runtime.Loader` (AssemblyLoadContext et al. — Core.Scripting-012), `Microsoft.Win32`; type-granular deny `System.Environment`, `System.AppDomain`, `System.GC`, `System.Activator`, `System.Threading.Thread`, `System.Threading.ThreadPool` (Core.Scripting-012), `System.Threading.Timer` (Core.Scripting-012) (these live directly in the allow-listed `System` / `System.Threading` namespaces, so a prefix rule cannot reach them without blocking primitives — Core.Scripting-001 / -009 / -012). |
|
||||
| 7 | Dependency declaration = **AST inference**; operator doesn't maintain a separate dependency list | `CSharpSyntaxWalker` extracts `ctx.GetTag("path")` string-literal calls at compile time; dynamic paths rejected at publish. |
|
||||
| 8 | Config storage = **config DB with generation-sealed cache** (same as driver instances) | Virtual tags + alarms publish atomically in the same generation as the driver instance config they may depend on. |
|
||||
| 9 | Script return value shape (`ctx.GetTag`) = **`DataValue { Value, StatusCode, Timestamp }`** | Scripts branch on quality naturally without separate `ctx.GetQuality(...)` calls. |
|
||||
@@ -89,7 +89,7 @@ Tie-in capability — **historian alarm sink**:
|
||||
|
||||
### Stream A — `Core.Scripting` (Roslyn engine + sandbox + AST inference + logger) — **2 weeks**
|
||||
|
||||
1. **A.1** Project scaffold + NuGet `Microsoft.CodeAnalysis.CSharp.Scripting`. `ScriptOptions` allow-list (`typeof(object).Assembly`, `typeof(Enumerable).Assembly`, the Core.Scripting assembly itself — nothing else). Hand-written `ScriptContext` base class with `GetTag(string)` / `SetVirtualTag(string, object)` / `Logger` / `Now` / `Deadband(double, double, double)` helpers.
|
||||
1. **A.1** Project scaffold + NuGet `Microsoft.CodeAnalysis.CSharp.Scripting`. `ScriptOptions` allow-list (`typeof(object).Assembly`, `typeof(Enumerable).Assembly`, the Core.Scripting assembly itself — nothing else). Hand-written `ScriptContext` base class with `GetTag(string)` / `SetVirtualTag(string, object)` / `Logger` / `Now` / `Deadband(double, double, double)` helpers. _(Implementation note 2026-05-23 — superseded by Core.Scripting-008 / -016: the `CSharpScript`/`ScriptRunner` path was replaced with a hand-rolled `CSharpCompilation.Create` → `Emit(MemoryStream)` → collectible `ScriptAssemblyLoadContext.LoadFromStream` pipeline so per-publish ALC accretion is reclaimable, and engines route compiles through `CompiledScriptCache` rather than calling `ScriptEvaluator.Compile` directly. The reference list was correspondingly widened from the narrow allow-list above to the full BCL `TRUSTED_PLATFORM_ASSEMBLIES` set (filtered to `System.*` + `netstandard` + `Microsoft.Win32.Registry`) because the new pipeline can't compile against the old narrow set; `ForbiddenTypeAnalyzer` is now the sole security gate, consistent with how Core.Scripting-001 / -002 established the analyzer must be the real boundary because type forwarding makes any references-list-only restriction porous. See `docs/VirtualTags.md` "Compile cache" for the current implementation contract.)_
|
||||
2. **A.2** `DependencyExtractor : CSharpSyntaxWalker`. Visits every `InvocationExpressionSyntax` targeting `ctx.GetTag` / `ctx.SetVirtualTag`; accepts only a `LiteralExpressionSyntax` argument. Non-literal arguments (concat, variable, method call) → publish-time rejection with an actionable error pointing the operator at the exact span. Outputs `IReadOnlySet<string> Inputs` + `IReadOnlySet<string> Outputs`.
|
||||
3. **A.3** Compile cache. `(source_hash) → compiled Script<T>`. Recompile only when source changes. Warm on `SealedBootstrap`.
|
||||
4. **A.4** Per-evaluation timeout wrapper (default 250ms; configurable per tag). Timeout = tag quality `BadInternalError` + structured warning log. Keeps a single runaway script from starving the engine.
|
||||
@@ -162,7 +162,7 @@ Tie-in capability — **historian alarm sink**:
|
||||
|
||||
## Compliance Checks (run at exit gate)
|
||||
|
||||
- [ ] **Sandbox escape**: attempts to reference any deny-listed namespace prefix (`System.IO`, `System.Net`, `System.Diagnostics`, `System.Reflection`, `System.Threading.Tasks`, `System.Runtime.InteropServices`, `Microsoft.Win32`) or any of the type-granular forbidden types (`System.Environment`, `System.AppDomain`, `System.GC`, `System.Activator`, `System.Threading.Thread`) fail at script compile with an actionable error. Vectors include direct calls, `typeof(T)`, generic type arguments, casts, `is`/`as` patterns, `default(T)`, array element types, and explicitly-typed local declarations.
|
||||
- [ ] **Sandbox escape**: attempts to reference any deny-listed namespace prefix (`System.IO`, `System.Net`, `System.Diagnostics`, `System.Reflection`, `System.Threading.Tasks`, `System.Runtime.InteropServices`, `System.Runtime.Loader`, `Microsoft.Win32`) or any of the type-granular forbidden types (`System.Environment`, `System.AppDomain`, `System.GC`, `System.Activator`, `System.Threading.Thread`, `System.Threading.ThreadPool`, `System.Threading.Timer`) fail at script compile with an actionable error. Vectors include direct calls, `typeof(T)`, generic type arguments, casts, `is`/`as` patterns, `default(T)`, array element types, and explicitly-typed local declarations.
|
||||
- [ ] **Dependency inference**: `ctx.GetTag(myStringVar)` (non-literal path) is rejected at publish with a span-pointed error; `ctx.GetTag("Line1/Speed")` is accepted + appears in the inferred input set.
|
||||
- [ ] **Change cascade**: tag A → virtual tag B → virtual tag C. When A changes, B recomputes, then C recomputes. Single change event triggers the full cascade in topological order within one evaluation pass.
|
||||
- [ ] **Cycle rejection**: publish a config where virtual tag B depends on A and A depends on B. Publish fails pre-commit with a clear cycle message.
|
||||
|
||||
@@ -96,7 +96,7 @@ Shipped as PR #183 (12 tests in configuration; 13 more in Admin.Tests).
|
||||
| F.4 — Test harness (modal, synthetic inputs, output + logger display) | **Partial** | `ScriptTestHarnessService.cs` is complete and tested. `ScriptsTab.razor` calls `Harness.RunVirtualTagAsync` with zero-value synthetic inputs derived from the extractor. A full interactive input-form modal was not shipped — the harness zeroes all inputs automatically rather than prompting the operator per-tag. |
|
||||
| F.5 — Script log viewer (SignalR tail of `scripts-*.log` filtered by `ScriptName`, load-more) | **Not started** | No SignalR stream of the scripts log is wired in the Admin UI. The `AlertHub` / `FleetStatusHub` exist but there is no `ScriptLogHub`. |
|
||||
| F.6 — `/alarms/historian` diagnostics view | **Done** | `AlarmsHistorian.razor` + `HistorianDiagnosticsService.cs` |
|
||||
| F.7 — Playwright smoke (author calc tag, verify in equipment tree; author alarm, verify in `AlarmsAndConditions`) | **Not started** | `tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests/` exists but its `UnsTabDragDropE2ETests.cs` is the only Playwright test; no Phase 7 Admin UI playwright scenario. |
|
||||
| F.7 — Playwright smoke (author calc tag, verify in equipment tree; author alarm, verify in `AlarmsAndConditions`) | **Not started** | No Phase 7 Playwright/E2E project exists in the repo today; future-work item without an assigned path. |
|
||||
|
||||
Shipped as PR #185 (13 Admin service tests; UI completeness is partial — see gaps section).
|
||||
|
||||
@@ -190,8 +190,8 @@ The SignalR tail of `scripts-*.log` filtered by `ScriptName` was not implemented
|
||||
| `Core.VirtualTags` sources | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/` |
|
||||
| `Core.ScriptedAlarms` sources | `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/` |
|
||||
| `Core.AlarmHistorian` sources | `src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/` |
|
||||
| Server Phase7 composition | `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/` |
|
||||
| Admin services | `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/Script*.cs`, `VirtualTagService.cs`, `HistorianDiagnosticsService.cs` |
|
||||
| Admin UI pages | `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Components/Pages/Clusters/ScriptsTab.razor`, `AlarmsHistorian.razor` |
|
||||
| Server Phase7 composition | `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`, `Phase7Applier.cs`, `Phase7Plan.cs` |
|
||||
| Admin services (CRUD writes) | `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs` (actor-driven); live state in `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`, `Runtime/VirtualTags/VirtualTagActor.cs`; Roslyn engines in `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/` — v1 `Admin/Services/Script*.cs`, `VirtualTagService.cs`, `HistorianDiagnosticsService.cs` deleted |
|
||||
| Admin UI pages | `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Scripts.razor`, `ScriptEdit.razor`, `ScriptedAlarms.razor`, `ScriptedAlarmEdit.razor`, `AlarmsHistorian.razor`, `VirtualTags.razor`, `VirtualTagEdit.razor` |
|
||||
| Historian sidecar writer | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs` |
|
||||
| EF migrations | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Migrations/20260420231641_AddPhase7ScriptingTables.cs`, `20260420232000_ExtendComputeGenerationDiffWithPhase7.cs` |
|
||||
|
||||
+2
-2
@@ -193,7 +193,7 @@ ConfigurationService
|
||||
- Compact binary format, faster than JSON, good fit for high-frequency data change callbacks
|
||||
- Simpler than gRPC on .NET 4.8 (which needs legacy `Grpc.Core` native library)
|
||||
|
||||
**Decided: Galaxy Host is a separate Windows service.**
|
||||
**Decided: Galaxy Host is a separate Windows service.** _(Reversed by PR 7.2, 2026-04-30 — see PR 7.2's commit `ae7106d` and the project_galaxy_via_mxgateway memory entry. The legacy in-process `Galaxy.Host` / `Galaxy.Proxy` / `Galaxy.Shared` projects + the `OtOpcUaGalaxyHost` Windows service were retired; Galaxy access now flows through the in-process Tier-A `GalaxyDriver` talking gRPC to a separately installed `mxaccessgw` gateway sibling repo. The reasoning below was correct for the original LMX/x86-COM architecture; the gateway sibling repo now owns those constraints externally.)_
|
||||
- Independent lifecycle from the OtOpcUa Server
|
||||
- Can be restarted without affecting the main server or other drivers
|
||||
- Galaxy.Proxy detects connection loss, sets Bad quality on Galaxy nodes, reconnects when Host comes back
|
||||
@@ -801,7 +801,7 @@ aggregate runner (#253); server-side factory + seed SQL per driver (#210–#213)
|
||||
| 26 | Admin deploys on same server (co-hosted) | Simplifies deployment; can also run on separate management host | 2026-04-16 |
|
||||
| 27 | Admin scaffold early, driver-specific screens deferred | Core CRUD for instances/drivers first; per-driver config UI added with each driver | 2026-04-16 |
|
||||
| 28 | Named pipes for Galaxy IPC | Fast, no port conflicts, native to both .NET 4.8 and .NET 10 | 2026-04-16 |
|
||||
| 29 | Galaxy Host is a separate Windows service | Independent lifecycle, can restart without affecting main server or other drivers | 2026-04-16 |
|
||||
| 29 | Galaxy Host is a separate Windows service | Independent lifecycle, can restart without affecting main server or other drivers | 2026-04-16 (**reversed PR 7.2, 2026-04-30** — Galaxy is now an in-process Tier-A driver talking gRPC to the sibling `mxaccessgw` gateway; see the decision body above) |
|
||||
| 30 | Drop TopShelf, use Microsoft.Extensions.Hosting | Built-in Windows Service support in .NET 10, no third-party dependency | 2026-04-16 |
|
||||
| 31 | Mono-repo for all drivers | Simpler dependency management, single CI pipeline, shared abstractions | 2026-04-16 |
|
||||
| 32 | MessagePack serialization for Galaxy IPC | Binary, fast, works on .NET 4.8+ and .NET 10 via MessagePack-CSharp NuGet | 2026-04-16 |
|
||||
|
||||
@@ -55,6 +55,7 @@ Each row is one manual run; pass criterion in the right column.
|
||||
| A2 | ServiceLevel updates on peer down | Connect to Primary. Stop Backup (`sc stop OtOpcUa`). Watch `ServiceLevel`. | Transitions 200 → 150 within ~2 s of peer probe timeout |
|
||||
| A3 | RedundancySupport | Browse to `Server.ServerRedundancy.RedundancySupport`. | Value matches the declared `RedundancyMode` (Warm / Hot / None) |
|
||||
| A4 | ServerUriArray (non-transparent upgrade) | Requires a redundancy-object-type upgrade follow-up. | When upgrade lands: `ServerUriArray` reports both ApplicationUris, self first |
|
||||
| A4b | Peer URI visibility via `Server.ServerArray` (i=2254) | Configure each `OpcUaApplicationHost` with the partner's `ApplicationUri` via `OpcUaApplicationHostOptions.PeerApplicationUris`. From any client, Read NodeId `i=2254` (`Server.ServerArray`). | Returned `String[]` includes both self + peer `ApplicationUri`s. Validated by `DualEndpointTests` in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/` (loopback dual-host with real OPCFoundation client `Session` read). |
|
||||
| A5 | Mid-apply dip | On Primary trigger a `sp_PublishGeneration` apply. | `ServiceLevel` drops to 75 for the apply duration + dwell |
|
||||
|
||||
### Block B — Client failover
|
||||
@@ -101,7 +102,9 @@ flips A4 from "deferred" to "expected pass").
|
||||
- **A4 pending**: `Server.ServerRedundancy` on our current SDK build lands as
|
||||
the base `ServerRedundancyState`, which has no `ServerUriArray` child.
|
||||
`ServerRedundancyNodeWriter.ApplyServerUriArray` logs-and-skips until the
|
||||
redundancy-object-type upgrade follow-up lands.
|
||||
redundancy-object-type upgrade follow-up lands. Cross-reference **A4b** —
|
||||
peer URIs are visible today via `Server.ServerArray` (i=2254) populated by
|
||||
`OpcUaApplicationHost.PopulateServerArray`.
|
||||
- **Recovery dwell default**: `RecoveryStateManager.DwellTime` defaults to 60 s
|
||||
in `Program.cs`. Adjust via future config knob if B3 takes too long to
|
||||
observe.
|
||||
@@ -121,8 +124,8 @@ flips A4 from "deferred" to "expected pass").
|
||||
redundancy implementations we don't control.
|
||||
- For the sub-set of scenarios that *can* be automated — the self-loopback
|
||||
case where our own `otopcua-cli` drives Primary + Backup — the existing
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/RedundancyStatePublisherTests` +
|
||||
`ServiceLevelCalculatorTests` (unit) + `ClusterTopologyLoaderTests`
|
||||
(integration) already cover the math + data path. The wire-level assertion
|
||||
that the values actually land on the right OPC UA nodes is covered by
|
||||
`ServerRedundancyNodeWriterTests`.
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/RedundancyStateActorTests` +
|
||||
`ServiceLevelCalculatorTests` (unit) already cover the math + data path.
|
||||
The wire-level assertion that the peer URIs actually land on the
|
||||
`Server.ServerArray` node (i=2254) is covered by `DualEndpointTests` in
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/`.
|
||||
|
||||
@@ -57,7 +57,7 @@ Remaining follow-ups (hardening):
|
||||
Remaining Phase 6.3 surfaces (hardening, not release-blocking):
|
||||
|
||||
- ~~`PeerHttpProbeLoop` + `PeerUaProbeLoop` HostedServices populating `PeerReachabilityTracker` on each tick.~~ **Closed 2026-04-24.** Two-layer probe model shipped: HTTP probe at 2 s / 1 s timeout against `/healthz`; OPC UA probe at 10 s / 5 s timeout via `DiscoveryClient.GetEndpoints`, short-circuiting when HTTP reports the peer unhealthy. Registered on the Server as `AddHostedService<PeerHttpProbeLoop>` + `AddHostedService<PeerUaProbeLoop>`. Publisher now sees accurate `PeerReachability` per peer instead of degrading to `Unknown` → Isolated-Primary band (230).
|
||||
- OPC UA variable-node wiring: bind `ServiceLevel` Byte + `ServerUriArray` String[] to the publisher's events via `BaseDataVariable.OnReadValue` / direct value push.
|
||||
- ~~OPC UA variable-node wiring: bind `ServiceLevel` Byte + `ServerUriArray` String[] to the publisher's events via `BaseDataVariable.OnReadValue` / direct value push.~~ **Closed 2026-05-26.** `ServiceLevel` byte binding closed earlier under Path D. Peer-URI half closed via `OpcUaApplicationHost.PopulateServerArray` — populates self + each `PeerApplicationUris` entry into the SDK `IServerInternal.ServerUris` `StringTable`; clients read `Server.ServerArray` (NodeId `i=2254`). Validated by `DualEndpointTests` in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/`. `ServerUriArray` proper (the redundancy-object-type child) remains deferred pending object-type upgrade.
|
||||
- ~~`sp_PublishGeneration` wraps its apply in `await using var lease = coordinator.BeginApplyLease(...)` so the `PrimaryMidApply` band (200) fires during actual publishes (task #148 part 2).~~ **Closed 2026-04-24.** The apply loop now lives in `GenerationRefreshHostedService` — polls `sp_GetCurrentGenerationForCluster` every 5s, opens a lease when a new generation is detected, calls `RedundancyCoordinator.RefreshAsync` inside the `await using`, releases the lease on all exit paths. Replaces the previous "topology never refreshes without a process restart" behaviour.
|
||||
- Client interop matrix — Ignition / Kepware / Aveva OI Gateway (Stream F, task #150). Manual + doc-only.
|
||||
|
||||
@@ -67,7 +67,7 @@ Remaining Phase 6.3 surfaces (hardening, not release-blocking):
|
||||
|
||||
- **AB CIP** (PRs #202–222) — `Driver.AbCip`, `Driver.AbCip.IntegrationTests` (7 tests), AB CIP Cli. Live-boot verified against a ControlLogix rig.
|
||||
- **AB Legacy** (PRs #202, #223) — `Driver.AbLegacy`, `Driver.AbLegacy.IntegrationTests` (2 tests), AB Legacy Cli. PCCC cip-path workaround for SLC/MicroLogix.
|
||||
- **TwinCAT ADS** (PRs #205, this branch `task-galaxy-e2e`) — `Driver.TwinCAT`, `Driver.TwinCAT.IntegrationTests` (2 tests), TwinCAT Cli. TCBSD/ESXi fixture for e2e since local Hyper-V / TwinCAT RTIME are mutually exclusive on the dev box.
|
||||
- **TwinCAT ADS** (PR #205) — `Driver.TwinCAT`, `Driver.TwinCAT.IntegrationTests` (2 tests), TwinCAT Cli. TCBSD/ESXi fixture for e2e since local Hyper-V / TwinCAT RTIME are mutually exclusive on the dev box.
|
||||
- **FOCAS** (PRs #173, #199 + this session's migration) — `Driver.FOCAS` with an **in-process managed `FocasWireClient`** that speaks FOCAS/2 over TCP directly. Tier-C isolation retired — `Driver.FOCAS.Host` + `Driver.FOCAS.Shared` + `FwlibNative` P/Invoke + shim DLL + NSSM service all deleted. `Driver.FOCAS.IntegrationTests` covers 9 scenarios (fixed tree identity/axes/program/timers/spindle + user-authored PARAM/MACRO/PMC reads, Browse, Subscribe, IAlarmSource raise/clear, Probe transitions).
|
||||
|
||||
Decision recorded: FOCAS is **read-only** against the CNC by design — writes return `BadNotWritable`. See `docs/drivers/FOCAS.md` + `docs/drivers/FOCAS-Test-Fixture.md` for the deployment + coverage map.
|
||||
@@ -118,6 +118,7 @@ v2 GA requires all of the following:
|
||||
|
||||
## Change log
|
||||
|
||||
- **2026-05-26** — Gap-closeout pass. `OpcUaApplicationHost.PopulateServerArray` populates `Server.ServerArray` (NodeId `i=2254`) with self + `OpcUaApplicationHostOptions.PeerApplicationUris`, giving non-transparent peer URI visibility through the standard discovery surface. New `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/` IT project (`DualEndpointTests`) validates with two real `OpcUaApplicationHost` instances on loopback + a live OPCFoundation client `Session` read. CI `v2-ci.yml` `integration:` job converted to a matrix across `Host.IntegrationTests` + `OpcUaServer.IntegrationTests`. Per-role appsettings overlays shipped (`appsettings.admin.json` / `appsettings.driver.json` / `appsettings.admin-driver.json`) — `Program.cs:33-35` loads by alphabetical-joined role suffix. `FailoverScenarioTests` → `FailoverDuringDeployTests` rename. Stale empty `src/Server/{Server,Admin}` + `tests/Server/{Server.Tests,Admin.Tests,Admin.E2ETests}` directories deleted (no source, absent from `.slnx`).
|
||||
- **2026-04-24** — Phase 5 driver complement closed (task #120 CLOSED). AB CIP, AB Legacy, TwinCAT, FOCAS all shipped. FOCAS migration: retired the Tier-C split (`Driver.FOCAS.Host` + `Driver.FOCAS.Shared` + `FwlibNative` + shim DLL deleted) in favour of a pure-managed in-process `FocasWireClient` inlined into `Driver.FOCAS`; driver is now read-only against the CNC by design. Integration test matrix grew to cover Browse / Subscribe / IAlarmSource / Probe end-to-end.
|
||||
- **2026-04-23** — Phase 6.4 audit close-out. IdentificationFolderBuilder + OPC 40010 Identification folder verified against the shipped code.
|
||||
- **2026-04-20** — Phase 7 plan drafted (`phase-7-scripting-and-alarming.md`, `phase-7-e2e-smoke.md`). Out of scope for v2 GA.
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,46 +1,63 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Registers the v2 Windows services on a node: OtOpcUa (main server, net10) and
|
||||
optionally OtOpcUaWonderwareHistorian (Wonderware historian sidecar).
|
||||
Registers the v2 Windows service on a node: OtOpcUaHost (fused binary, .NET 10)
|
||||
and optionally OtOpcUaWonderwareHistorian (Wonderware historian sidecar, net48 x86).
|
||||
|
||||
.DESCRIPTION
|
||||
PR 7.2 retired the legacy out-of-process OtOpcUaGalaxyHost service alongside the
|
||||
GalaxyProxyDriver / GalaxyHost / GalaxyShared projects. Galaxy access now flows
|
||||
through the in-process GalaxyDriver talking gRPC to a separately-installed
|
||||
mxaccessgw. The mxaccessgw server runs out of its own repo
|
||||
(`c:\Users\dohertj2\Desktop\mxaccessgw\`) — see
|
||||
`docs/v2/Galaxy.ParityRig.md` for the gw setup recipe.
|
||||
v2 consolidates the legacy OtOpcUa + OtOpcUaAdmin services into a single role-gated
|
||||
OtOpcUaHost binary. The -Roles parameter sets the OTOPCUA_ROLES service env so
|
||||
Program.cs decides what to mount (admin / driver / both). The Wonderware historian
|
||||
sidecar logic is unchanged from v1; install it with -InstallWonderwareHistorian.
|
||||
|
||||
Galaxy access flows through the mxaccessgw sibling repo (separate service); see
|
||||
docs/v2/Galaxy.ParityRig.md for the gateway setup.
|
||||
|
||||
.PARAMETER InstallRoot
|
||||
Where the binaries live (typically C:\Program Files\OtOpcUa).
|
||||
Where the binaries live (typically C:\Program Files\OtOpcUa). The OtOpcUaHost
|
||||
service runs OtOpcUa.Host.exe from this directory; publish the Host project there
|
||||
with `dotnet publish -c Release -r win-x64 --self-contained` first.
|
||||
|
||||
.PARAMETER ServiceAccount
|
||||
Service account SID or DOMAIN\name. The OtOpcUa service runs under this account.
|
||||
Service account SID or DOMAIN\name. The OtOpcUaHost service runs under this account.
|
||||
|
||||
.PARAMETER Roles
|
||||
Comma-separated cluster roles for this node. One of:
|
||||
- "admin,driver" — single-node dev or all-in-one production node
|
||||
- "admin" — admin-only HA pair member (Blazor + control-plane singletons)
|
||||
- "driver" — driver-only node (OPC UA endpoint + per-node actors)
|
||||
Written to the service env as OTOPCUA_ROLES.
|
||||
|
||||
.PARAMETER HttpPort
|
||||
HTTP port for the AdminUI + auth endpoints. Default 9000. Written as ASPNETCORE_URLS.
|
||||
Ignored on driver-only nodes (no Blazor surface).
|
||||
|
||||
.PARAMETER InstallWonderwareHistorian
|
||||
Gate the OtOpcUaWonderwareHistorian sidecar install. Off by default; set when
|
||||
the deployment uses the Wonderware historian for history reads + alarm-event
|
||||
persistence.
|
||||
Gate the OtOpcUaWonderwareHistorian sidecar install. Off by default; set when the
|
||||
deployment uses the Wonderware historian for history reads + alarm-event persistence.
|
||||
|
||||
.PARAMETER HistorianSharedSecret
|
||||
Per-process secret passed to the Historian sidecar via env var. Generated
|
||||
freshly per install when not supplied.
|
||||
Per-process secret passed to the historian sidecar via env var. Generated freshly
|
||||
per install when not supplied.
|
||||
|
||||
.EXAMPLE
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' -ServiceAccount 'OTOPCUA\svc-otopcua'
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' `
|
||||
-ServiceAccount 'OTOPCUA\svc-otopcua' -Roles 'admin,driver'
|
||||
|
||||
.EXAMPLE
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' -ServiceAccount 'OTOPCUA\svc-otopcua' `
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' `
|
||||
-ServiceAccount 'OTOPCUA\svc-otopcua' -Roles 'driver' `
|
||||
-InstallWonderwareHistorian
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[Parameter(Mandatory)] [string]$InstallRoot,
|
||||
[Parameter(Mandatory)] [string]$ServiceAccount,
|
||||
[Parameter(Mandatory)] [ValidateSet('admin', 'driver', 'admin,driver', 'driver,admin')]
|
||||
[string]$Roles,
|
||||
[int]$HttpPort = 9000,
|
||||
|
||||
# PR 3.W — Wonderware historian sidecar. Optional; gates the
|
||||
# OtOpcUaWonderwareHistorian service. Secret + pipe defaults match the server's
|
||||
# Historian:Wonderware appsettings block.
|
||||
# Wonderware historian sidecar. Optional; gates the OtOpcUaWonderwareHistorian
|
||||
# service. Secret + pipe defaults match the server's Historian:Wonderware appsettings.
|
||||
[switch]$InstallWonderwareHistorian,
|
||||
[string]$HistorianSharedSecret,
|
||||
[string]$HistorianPipeName = 'OtOpcUaWonderwareHistorian',
|
||||
@@ -51,18 +68,19 @@ param(
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
if (-not (Test-Path "$InstallRoot\OtOpcUa.Server.exe")) {
|
||||
Write-Error "OtOpcUa.Server.exe not found at $InstallRoot — copy the publish output first"
|
||||
if (-not (Test-Path "$InstallRoot\OtOpcUa.Host.exe")) {
|
||||
Write-Error "OtOpcUa.Host.exe not found at $InstallRoot — copy the publish output first"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Generate fresh shared secrets per install if not supplied.
|
||||
function New-SharedSecret {
|
||||
$bytes = New-Object byte[] 32
|
||||
[System.Security.Cryptography.RandomNumberGenerator]::Create().GetBytes($bytes)
|
||||
return [Convert]::ToBase64String($bytes)
|
||||
}
|
||||
if ($InstallWonderwareHistorian -and -not $HistorianSharedSecret) { $HistorianSharedSecret = New-SharedSecret }
|
||||
if ($InstallWonderwareHistorian -and -not $HistorianSharedSecret) {
|
||||
$HistorianSharedSecret = New-SharedSecret
|
||||
}
|
||||
|
||||
if ($InstallWonderwareHistorian -and -not (Test-Path "$InstallRoot\WonderwareHistorian\OtOpcUa.Driver.Historian.Wonderware.exe")) {
|
||||
Write-Error "OtOpcUa.Driver.Historian.Wonderware.exe not found at $InstallRoot\WonderwareHistorian — copy the publish output first"
|
||||
@@ -76,10 +94,7 @@ $sid = if ($ServiceAccount.StartsWith('S-1-')) {
|
||||
(New-Object System.Security.Principal.NTAccount $ServiceAccount).Translate([System.Security.Principal.SecurityIdentifier]).Value
|
||||
}
|
||||
|
||||
# --- Install OtOpcUaWonderwareHistorian (PR 3.W) — separate sidecar that exposes the
|
||||
# Wonderware Historian SDK via a named-pipe protocol consumed by the .NET 10 server.
|
||||
# Optional: only installed when -InstallWonderwareHistorian is supplied. Depends on the
|
||||
# hard AVEVA services that host the historian SDK runtime path.
|
||||
# --- OtOpcUaWonderwareHistorian sidecar (optional, unchanged from v1) -------
|
||||
$historianDepend = $null
|
||||
if ($InstallWonderwareHistorian) {
|
||||
$historianEnv = @(
|
||||
@@ -87,14 +102,10 @@ if ($InstallWonderwareHistorian) {
|
||||
"OTOPCUA_ALLOWED_SID=$sid"
|
||||
"OTOPCUA_HISTORIAN_SECRET=$HistorianSharedSecret"
|
||||
"OTOPCUA_HISTORIAN_ENABLED=true"
|
||||
# Default-on when the historian sidecar is installed; flip to false for a
|
||||
# read-only deployment that still loads aahClientManaged for reads but
|
||||
# rejects WriteAlarmEvents frames.
|
||||
"OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED=true"
|
||||
"OTOPCUA_HISTORIAN_SERVER=$HistorianServer"
|
||||
"OTOPCUA_HISTORIAN_PORT=$HistorianPort"
|
||||
) -join "`0"
|
||||
$historianEnv += "`0`0"
|
||||
)
|
||||
|
||||
Write-Host "Installing OtOpcUaWonderwareHistorian..."
|
||||
& sc.exe create OtOpcUaWonderwareHistorian binPath= "`"$InstallRoot\WonderwareHistorian\OtOpcUa.Driver.Historian.Wonderware.exe`"" `
|
||||
@@ -105,36 +116,59 @@ if ($InstallWonderwareHistorian) {
|
||||
& sc.exe config OtOpcUaWonderwareHistorian start= delayed-auto | Out-Null
|
||||
|
||||
$svcKey = "HKLM:\SYSTEM\CurrentControlSet\Services\OtOpcUaWonderwareHistorian"
|
||||
$envValue = $historianEnv.Split("`0") | Where-Object { $_ -ne '' }
|
||||
Set-ItemProperty -Path $svcKey -Name 'Environment' -Type MultiString -Value $envValue
|
||||
Set-ItemProperty -Path $svcKey -Name 'Environment' -Type MultiString -Value $historianEnv
|
||||
|
||||
& sc.exe failure OtOpcUaWonderwareHistorian reset= 86400 actions= restart/5000/restart/30000/restart/60000 | Out-Null
|
||||
|
||||
$historianDepend = 'OtOpcUaWonderwareHistorian'
|
||||
}
|
||||
|
||||
# --- Install OtOpcUa. Galaxy access flows through GalaxyDriver → mxaccessgw (gRPC),
|
||||
# so OtOpcUa no longer depends on a sibling service for Galaxy connectivity. The
|
||||
# mxaccessgw is installed separately. When the Wonderware sidecar is installed,
|
||||
# depend on it for startup ordering.
|
||||
$otOpcUaDepends = @()
|
||||
if ($historianDepend) { $otOpcUaDepends += $historianDepend }
|
||||
# --- OtOpcUaHost (the fused v2 binary) --------------------------------------
|
||||
$normalisedRoles = ($Roles -split ',' | ForEach-Object { $_.Trim() } | Sort-Object -Unique) -join ','
|
||||
|
||||
Write-Host "Installing OtOpcUa..."
|
||||
$hasAdmin = $normalisedRoles -split ',' -contains 'admin'
|
||||
|
||||
$hostEnv = @(
|
||||
"OTOPCUA_ROLES=$normalisedRoles",
|
||||
'DOTNET_ENVIRONMENT=Production'
|
||||
)
|
||||
if ($hasAdmin) {
|
||||
$hostEnv += "ASPNETCORE_URLS=http://+:$HttpPort"
|
||||
}
|
||||
|
||||
$hostDepends = @()
|
||||
if ($historianDepend) { $hostDepends += $historianDepend }
|
||||
|
||||
Write-Host "Installing OtOpcUaHost (roles=$normalisedRoles)..."
|
||||
$createArgs = @(
|
||||
'create', 'OtOpcUa',
|
||||
'binPath=', "`"$InstallRoot\OtOpcUa.Server.exe`"",
|
||||
'DisplayName=', 'OtOpcUa Server',
|
||||
'create', 'OtOpcUaHost',
|
||||
'binPath=', "`"$InstallRoot\OtOpcUa.Host.exe`"",
|
||||
'DisplayName=', "OtOpcUa Host ($normalisedRoles)",
|
||||
'start=', 'auto',
|
||||
'obj=', $ServiceAccount
|
||||
)
|
||||
if ($otOpcUaDepends.Count -gt 0) {
|
||||
$createArgs += @('depend=', ($otOpcUaDepends -join '/'))
|
||||
if ($hostDepends.Count -gt 0) {
|
||||
$createArgs += @('depend=', ($hostDepends -join '/'))
|
||||
}
|
||||
& sc.exe @createArgs | Out-Null
|
||||
|
||||
# Env block via registry MultiString (sc.exe doesn't take env directly).
|
||||
$svcKey = "HKLM:\SYSTEM\CurrentControlSet\Services\OtOpcUaHost"
|
||||
Set-ItemProperty -Path $svcKey -Name 'Environment' -Type MultiString -Value $hostEnv
|
||||
|
||||
# Restart-on-failure: 5s, 30s, 60s; reset counter after a clean 24h run.
|
||||
& sc.exe failure OtOpcUaHost reset= 86400 actions= restart/5000/restart/30000/restart/60000 | Out-Null
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Installed. Start with:"
|
||||
Write-Host "Installed OtOpcUaHost:"
|
||||
Write-Host " Roles: $normalisedRoles"
|
||||
if ($hasAdmin) { Write-Host " HTTP port: $HttpPort" }
|
||||
Write-Host " Binary: $InstallRoot\OtOpcUa.Host.exe"
|
||||
Write-Host " Account: $ServiceAccount"
|
||||
Write-Host ""
|
||||
Write-Host "Start with:"
|
||||
if ($InstallWonderwareHistorian) { Write-Host " sc.exe start OtOpcUaWonderwareHistorian" }
|
||||
Write-Host " sc.exe start OtOpcUa"
|
||||
Write-Host " sc.exe start OtOpcUaHost"
|
||||
if ($InstallWonderwareHistorian) {
|
||||
Write-Host ""
|
||||
Write-Host "Wonderware historian shared secret (configure into appsettings.json Historian:Wonderware:SharedSecret):"
|
||||
@@ -142,5 +176,5 @@ if ($InstallWonderwareHistorian) {
|
||||
}
|
||||
Write-Host ""
|
||||
Write-Host "NOTE: Galaxy access flows through mxaccessgw — install + run that separately"
|
||||
Write-Host " per docs/v2/Galaxy.ParityRig.md. OtOpcUa connects via the Galaxy.Gateway"
|
||||
Write-Host " section of appsettings.json (default endpoint http://localhost:5120)."
|
||||
Write-Host " per docs/v2/Galaxy.ParityRig.md. OtOpcUaHost connects via the"
|
||||
Write-Host " Galaxy.Gateway section of appsettings.json (default http://localhost:5120)."
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Installs Traefik as a Windows service that routes admin HTTP traffic to whichever
|
||||
OtOpcUa.Host node holds the admin role-leader (via /health/active).
|
||||
|
||||
.DESCRIPTION
|
||||
Downloads the Traefik Windows binary into $InstallRoot, drops traefik.yml +
|
||||
traefik-dynamic.yml from this directory next to it, and registers Traefik as a
|
||||
Windows service via sc.exe with restart-on-failure.
|
||||
|
||||
Companion to Install-Services.ps1. Run on the box that fronts the admin HTTP
|
||||
traffic (typically a separate node from OtOpcUaHost, or co-located on the
|
||||
primary admin node).
|
||||
|
||||
.PARAMETER InstallRoot
|
||||
Where the Traefik binary + config land. Default 'C:\Program Files\Traefik'.
|
||||
|
||||
.PARAMETER TraefikVersion
|
||||
Traefik version to download. Default 'v3.1.6'.
|
||||
|
||||
.EXAMPLE
|
||||
.\Install-Traefik.ps1 -InstallRoot 'C:\Program Files\Traefik'
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[string]$InstallRoot = 'C:\Program Files\Traefik',
|
||||
[string]$TraefikVersion = 'v3.1.6'
|
||||
)
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
if (-not (Test-Path $InstallRoot)) {
|
||||
New-Item -ItemType Directory -Path $InstallRoot | Out-Null
|
||||
}
|
||||
|
||||
$zip = Join-Path $env:TEMP "traefik-$TraefikVersion.zip"
|
||||
$url = "https://github.com/traefik/traefik/releases/download/$TraefikVersion/traefik_${TraefikVersion}_windows_amd64.zip"
|
||||
|
||||
Write-Host "Downloading Traefik $TraefikVersion..."
|
||||
Invoke-WebRequest -Uri $url -OutFile $zip
|
||||
Expand-Archive -Path $zip -DestinationPath $InstallRoot -Force
|
||||
Remove-Item $zip
|
||||
|
||||
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
|
||||
Copy-Item -Force (Join-Path $scriptDir 'traefik.yml') $InstallRoot
|
||||
Copy-Item -Force (Join-Path $scriptDir 'traefik-dynamic.yml') (Join-Path $InstallRoot 'dynamic.yml')
|
||||
|
||||
# Traefik reads dynamic.yml from /etc/traefik on Linux; on Windows place it next to the
|
||||
# binary and point the file provider at it. Edit traefik.yml's `filename:` if you want
|
||||
# to change the location.
|
||||
(Get-Content -Raw (Join-Path $InstallRoot 'traefik.yml')) `
|
||||
-replace '/etc/traefik/dynamic.yml', (Join-Path $InstallRoot 'dynamic.yml').Replace('\', '/') `
|
||||
| Set-Content (Join-Path $InstallRoot 'traefik.yml')
|
||||
|
||||
Write-Host "Installing Traefik Windows service..."
|
||||
& sc.exe create OtOpcUaTraefik binPath= "`"$InstallRoot\traefik.exe`" --configFile=`"$InstallRoot\traefik.yml`"" `
|
||||
DisplayName= 'OtOpcUa Traefik (admin HTTP front door)' `
|
||||
start= auto | Out-Null
|
||||
|
||||
& sc.exe failure OtOpcUaTraefik reset= 86400 actions= restart/5000/restart/30000/restart/60000 | Out-Null
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Installed OtOpcUaTraefik. Edit:"
|
||||
Write-Host " $InstallRoot\dynamic.yml (router + service definitions)"
|
||||
Write-Host "Start with:"
|
||||
Write-Host " sc.exe start OtOpcUaTraefik"
|
||||
Write-Host ""
|
||||
Write-Host "Traefik dashboard: http://localhost:8080 (turn off api.insecure in production)"
|
||||
@@ -43,11 +43,11 @@ function Test-NssmService([string]$Name) {
|
||||
# Step 1: Stop in reverse dependency order
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
Step "Stopping services (OtOpcUa → OtOpcUaWonderwareHistorian → MxAccessGw)"
|
||||
Step "Stopping services (OtOpcUaHost > OtOpcUaWonderwareHistorian > MxAccessGw)"
|
||||
|
||||
foreach ($name in @('OtOpcUa', 'OtOpcUaWonderwareHistorian', 'MxAccessGw')) {
|
||||
foreach ($name in @('OtOpcUaHost', 'OtOpcUaWonderwareHistorian', 'MxAccessGw')) {
|
||||
if (Test-NssmService $name) {
|
||||
Run { nssm stop $name } "stop $name"
|
||||
Run { Stop-Service $name -Force -ErrorAction SilentlyContinue } "stop $name"
|
||||
}
|
||||
else {
|
||||
Write-Host " ($name not installed; skipping)" -ForegroundColor DarkGray
|
||||
@@ -56,7 +56,7 @@ foreach ($name in @('OtOpcUa', 'OtOpcUaWonderwareHistorian', 'MxAccessGw')) {
|
||||
|
||||
if (-not $WhatIf) {
|
||||
Start-Sleep -Seconds 3
|
||||
Get-Process MxGateway.Server, MxGateway.Worker, OtOpcUa.Server, OtOpcUa.Driver.Historian.Wonderware -ErrorAction SilentlyContinue |
|
||||
Get-Process MxGateway.Server, MxGateway.Worker, OtOpcUa.Host, OtOpcUa.Driver.Historian.Wonderware -ErrorAction SilentlyContinue |
|
||||
ForEach-Object {
|
||||
Write-Host " killing residual process $($_.ProcessName) (PID=$($_.Id))" -ForegroundColor DarkYellow
|
||||
Stop-Process -Id $_.Id -Force -ErrorAction SilentlyContinue
|
||||
@@ -109,14 +109,14 @@ Run {
|
||||
# Step 4: Refresh OtOpcUa + Wonderware historian sidecar
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
Step "Publishing OtOpcUa server + Wonderware historian sidecar from $RepoRoot"
|
||||
Step "Publishing OtOpcUa.Host + Wonderware historian sidecar from $RepoRoot"
|
||||
|
||||
Run {
|
||||
& dotnet publish "$RepoRoot\src\Server\ZB.MOM.WW.OtOpcUa.Server" `
|
||||
& dotnet publish "$RepoRoot\src\Server\ZB.MOM.WW.OtOpcUa.Host" `
|
||||
-c Release -o (Join-Path $PublishRoot "lmxopcua") | Out-Null
|
||||
& dotnet publish "$RepoRoot\src\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware" `
|
||||
-c Release -o (Join-Path $PublishRoot "lmxopcua\WonderwareHistorian") | Out-Null
|
||||
} "dotnet publish (Server + sidecar)"
|
||||
} "dotnet publish (Host + sidecar)"
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Step 5: Service env block — ensure OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED
|
||||
@@ -143,16 +143,16 @@ if (Test-NssmService 'OtOpcUaWonderwareHistorian') {
|
||||
# Step 6: Start in forward dependency order
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
Step "Starting services (MxAccessGw → OtOpcUaWonderwareHistorian → OtOpcUa)"
|
||||
Step "Starting services (MxAccessGw > OtOpcUaWonderwareHistorian > OtOpcUaHost)"
|
||||
|
||||
foreach ($pair in @(
|
||||
@{ Name = 'MxAccessGw'; Wait = 4 },
|
||||
@{ Name = 'OtOpcUaWonderwareHistorian'; Wait = 4 },
|
||||
@{ Name = 'OtOpcUa'; Wait = 8 }
|
||||
@{ Name = 'OtOpcUaHost'; Wait = 8 }
|
||||
)) {
|
||||
$name = $pair.Name
|
||||
if (Test-NssmService $name) {
|
||||
Run { nssm start $name } "start $name"
|
||||
Run { Start-Service $name } "start $name"
|
||||
if (-not $WhatIf) { Start-Sleep -Seconds $pair.Wait }
|
||||
}
|
||||
else {
|
||||
@@ -167,7 +167,7 @@ foreach ($pair in @(
|
||||
Step "Smoke verification"
|
||||
|
||||
if (-not $WhatIf) {
|
||||
foreach ($name in @('MxAccessGw', 'OtOpcUaWonderwareHistorian', 'OtOpcUa')) {
|
||||
foreach ($name in @('MxAccessGw', 'OtOpcUaWonderwareHistorian', 'OtOpcUaHost')) {
|
||||
if (Test-NssmService $name) {
|
||||
$status = (Get-Service $name).Status
|
||||
$color = if ($status -eq 'Running') { 'Green' } else { 'Red' }
|
||||
|
||||
@@ -3,16 +3,17 @@
|
||||
Stops + removes the v2 services. Mirrors Install-Services.ps1.
|
||||
|
||||
.DESCRIPTION
|
||||
PR 7.2 retired the legacy OtOpcUaGalaxyHost service. Galaxy access now flows
|
||||
through the in-process GalaxyDriver against a separately-installed mxaccessgw.
|
||||
OtOpcUaGalaxyHost is included in the cleanup loop below so this script safely
|
||||
removes it from any rig still carrying the legacy service from a pre-7.2
|
||||
install.
|
||||
Removes the v2 OtOpcUaHost service plus the optional OtOpcUaWonderwareHistorian
|
||||
sidecar. Also cleans up legacy service names from prior installs:
|
||||
- OtOpcUa (v1 server) — replaced by OtOpcUaHost in v2
|
||||
- OtOpcUaAdmin (v1 admin) — fused into OtOpcUaHost in v2
|
||||
- OtOpcUaGalaxyHost (pre-7.2 Galaxy host) — long-retired
|
||||
#>
|
||||
[CmdletBinding()] param()
|
||||
$ErrorActionPreference = 'Continue'
|
||||
|
||||
foreach ($svc in 'OtOpcUa', 'OtOpcUaWonderwareHistorian', 'OtOpcUaGalaxyHost') {
|
||||
foreach ($svc in 'OtOpcUaHost', 'OtOpcUaWonderwareHistorian',
|
||||
'OtOpcUa', 'OtOpcUaAdmin', 'OtOpcUaGalaxyHost') {
|
||||
if (Get-Service $svc -ErrorAction SilentlyContinue) {
|
||||
Write-Host "Stopping $svc..."
|
||||
Stop-Service $svc -Force -ErrorAction SilentlyContinue
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
# Dynamic (file-provider) Traefik config for the OtOpcUa admin HTTP routing.
|
||||
# Picked up by traefik.yml's file provider (with watch: true) so router/service
|
||||
# edits hot-reload without a Traefik restart.
|
||||
|
||||
http:
|
||||
routers:
|
||||
otopcua-admin:
|
||||
entryPoints: ["web"]
|
||||
rule: "HostRegexp(`otopcua.*`)"
|
||||
service: otopcua-admin
|
||||
|
||||
services:
|
||||
otopcua-admin:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://admin-a:9000"
|
||||
- url: "http://admin-b:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
# Default expected status is 2xx. Followers return 503 from
|
||||
# /health/active so Traefik will drop them from the balancer
|
||||
# within the next interval after a leadership change.
|
||||
@@ -0,0 +1,30 @@
|
||||
# Traefik static configuration for the OtOpcUa fleet HTTP front door.
|
||||
#
|
||||
# Routes admin-role HTTP traffic (Blazor + auth + SignalR + /auth/*) to whichever
|
||||
# OtOpcUa.Host node currently holds the admin role-leader. Uses the /health/active
|
||||
# endpoint as the active-leader signal: a node returns 200 only when it is the
|
||||
# Akka admin role-leader; followers return 503 and Traefik routes around them.
|
||||
#
|
||||
# OPC UA traffic is NOT routed through Traefik — clients connect directly to
|
||||
# opc.tcp://node:4840 on every driver node and use the standard ServiceLevel
|
||||
# heuristic for failover.
|
||||
|
||||
entryPoints:
|
||||
web:
|
||||
address: ":80"
|
||||
|
||||
providers:
|
||||
file:
|
||||
filename: /etc/traefik/dynamic.yml
|
||||
watch: true
|
||||
|
||||
api:
|
||||
insecure: true
|
||||
dashboard: true
|
||||
|
||||
log:
|
||||
level: INFO
|
||||
format: common
|
||||
|
||||
accessLog:
|
||||
format: common
|
||||
@@ -0,0 +1,59 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Idempotent migration runner that takes the OtOpcUaConfig database from the v1 schema
|
||||
(with ConfigGeneration / ClusterNodeGenerationState) to the v2 hosting-aligned schema
|
||||
(with Deployment / NodeDeploymentState / ConfigEdit / DataProtectionKeys).
|
||||
|
||||
.DESCRIPTION
|
||||
Backs the database up, applies the idempotent EF migration script, then validates that
|
||||
expected tables exist and legacy tables are gone. Safe to re-run — the EF script itself
|
||||
is idempotent, and the backup picks a unique filename per invocation.
|
||||
|
||||
.PARAMETER ConnectionString
|
||||
Mandatory. Full ADO.NET connection string with permissions to BACKUP DATABASE and
|
||||
apply DDL on the target ConfigDb.
|
||||
|
||||
.PARAMETER BackupPath
|
||||
Optional. Full path for the backup file. Defaults to a timestamped path under $env:TEMP.
|
||||
|
||||
.EXAMPLE
|
||||
.\Migrate-To-V2.ps1 -ConnectionString "Server=sql01;Database=OtOpcUaConfig;Trusted_Connection=True;TrustServerCertificate=True"
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[Parameter(Mandatory)][string] $ConnectionString,
|
||||
[string] $BackupPath = "$env:TEMP\OtOpcUa-V1-Backup-$(Get-Date -Format yyyyMMddHHmmss).bak"
|
||||
)
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
if (-not (Get-Command Invoke-Sqlcmd -ErrorAction SilentlyContinue)) {
|
||||
throw "Invoke-Sqlcmd not available. Install module: Install-Module SqlServer -Scope CurrentUser"
|
||||
}
|
||||
|
||||
Write-Host "Step 1/4 — Backup ConfigDb to $BackupPath" -ForegroundColor Cyan
|
||||
Invoke-Sqlcmd -ConnectionString $ConnectionString `
|
||||
-Query "BACKUP DATABASE [OtOpcUaConfig] TO DISK = '$BackupPath' WITH FORMAT, COMPRESSION"
|
||||
|
||||
Write-Host "Step 2/4 — Row counts (before)" -ForegroundColor Cyan
|
||||
$beforeCounts = Invoke-Sqlcmd -ConnectionString $ConnectionString -InputFile "$PSScriptRoot\count-rows.sql"
|
||||
$beforeCounts | Format-Table
|
||||
|
||||
Write-Host "Step 3/4 — Apply Migrate-To-V2.sql" -ForegroundColor Cyan
|
||||
Invoke-Sqlcmd -ConnectionString $ConnectionString -InputFile "$PSScriptRoot\Migrate-To-V2.sql" -QueryTimeout 1800
|
||||
|
||||
Write-Host "Step 4/4 — Row counts (after) + validation" -ForegroundColor Cyan
|
||||
$afterCounts = Invoke-Sqlcmd -ConnectionString $ConnectionString -InputFile "$PSScriptRoot\count-rows.sql"
|
||||
$afterCounts | Format-Table
|
||||
|
||||
$tablesNow = (Invoke-Sqlcmd -ConnectionString $ConnectionString `
|
||||
-Query "SELECT name FROM sys.tables ORDER BY name").name
|
||||
|
||||
foreach ($t in 'Deployment','NodeDeploymentState','ConfigEdit','DataProtectionKeys') {
|
||||
if ($tablesNow -notcontains $t) { throw "Expected v2 table $t missing." }
|
||||
}
|
||||
foreach ($t in 'ConfigGeneration','ClusterNodeGenerationState') {
|
||||
if ($tablesNow -contains $t) { throw "Legacy v1 table $t still present." }
|
||||
}
|
||||
|
||||
Write-Host "Migration complete. Backup at $BackupPath" -ForegroundColor Green
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,26 @@
|
||||
-- Per-table row counts for pre/post-migration audit.
|
||||
-- Covers every table relevant to the v1 -> v2 transition so the operator can confirm
|
||||
-- live-edit data was preserved and v2 tables came up empty.
|
||||
|
||||
SELECT TableName = t.name, [Rows] = SUM(p.[rows])
|
||||
FROM sys.tables t
|
||||
JOIN sys.partitions p ON p.object_id = t.object_id AND p.index_id IN (0,1)
|
||||
WHERE t.name IN (
|
||||
-- Live-edit configuration (rows must survive)
|
||||
'ServerCluster','ClusterNode','ClusterNodeCredential',
|
||||
'Namespace','UnsArea','UnsLine',
|
||||
'DriverInstance','Device','Equipment','Tag','PollGroup','VirtualTag',
|
||||
'NodeAcl','ExternalIdReservation',
|
||||
'Script','ScriptedAlarm','ScriptedAlarmState',
|
||||
'LdapGroupRoleMapping',
|
||||
'EquipmentImportBatch','EquipmentImportRow',
|
||||
-- Status tables (rebuilt at runtime; counts informational)
|
||||
'DriverHostStatus','DriverInstanceResilienceStatus',
|
||||
-- Audit (preserved)
|
||||
'ConfigAuditLog',
|
||||
-- v2 deploy model (empty pre-migration, populated post)
|
||||
'Deployment','NodeDeploymentState','ConfigEdit','DataProtectionKeys'
|
||||
)
|
||||
GROUP BY t.name
|
||||
ORDER BY t.name;
|
||||
GO
|
||||
@@ -109,8 +109,16 @@ public abstract class CommandBase : ICommand
|
||||
/// <summary>
|
||||
/// Configures Serilog based on the verbose flag.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Disposes the previously assigned <see cref="Log.Logger" /> via <see cref="Log.CloseAndFlush" />
|
||||
/// before installing the new one, so repeated CLI invocations (e.g. in the test suite) do not
|
||||
/// leak the prior logger's console sink.
|
||||
/// </remarks>
|
||||
protected void ConfigureLogging()
|
||||
{
|
||||
// Dispose any previously installed logger before swapping in a new one.
|
||||
Log.CloseAndFlush();
|
||||
|
||||
var config = new LoggerConfiguration();
|
||||
if (Verbose)
|
||||
config.MinimumLevel.Debug()
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
using System.Threading.Channels;
|
||||
using CliFx.Attributes;
|
||||
using CliFx.Exceptions;
|
||||
using CliFx.Infrastructure;
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.CLI.Helpers;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.Shared;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.Shared.Models;
|
||||
@@ -40,17 +42,29 @@ public class AlarmsCommand : CommandBase
|
||||
/// Connects to the server, subscribes to alarm events, and streams operator-facing alarm state changes to the console.
|
||||
/// </summary>
|
||||
/// <param name="console">The CLI console used for output and cancellation handling.</param>
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
if (Interval <= 0)
|
||||
throw new CommandException($"--interval must be greater than 0 (was {Interval}).");
|
||||
|
||||
NodeId? sourceNodeId;
|
||||
try
|
||||
{
|
||||
sourceNodeId = NodeIdParser.Parse(NodeId);
|
||||
}
|
||||
catch (Exception ex) when (ex is FormatException or ArgumentException)
|
||||
{
|
||||
throw new CommandException($"Invalid --node value: {ex.Message}");
|
||||
}
|
||||
|
||||
IOpcUaClientService? service = null;
|
||||
try
|
||||
{
|
||||
var ct = console.RegisterCancellationHandler();
|
||||
(service, _) = await CreateServiceAndConnectAsync(ct);
|
||||
|
||||
var sourceNodeId = NodeIdParser.Parse(NodeId);
|
||||
|
||||
// Channel serialises SDK notification-thread writes to the main async loop so
|
||||
// that concurrent alarm callbacks never interleave on the shared TextWriter.
|
||||
var outputChannel = Channel.CreateUnbounded<string>(
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using CliFx.Attributes;
|
||||
using CliFx.Exceptions;
|
||||
using CliFx.Infrastructure;
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.CLI.Helpers;
|
||||
@@ -35,20 +36,29 @@ public class BrowseCommand : CommandBase
|
||||
[CommandOption("recursive", 'r', Description = "Browse recursively (uses --depth as max depth)")]
|
||||
public bool Recursive { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the server and prints a tree view of the requested address-space branch.
|
||||
/// </summary>
|
||||
/// <param name="console">The CLI console used for output and cancellation handling.</param>
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
if (Depth <= 0)
|
||||
throw new CommandException($"--depth must be greater than 0 (was {Depth}).");
|
||||
|
||||
NodeId? startNode;
|
||||
try
|
||||
{
|
||||
startNode = NodeIdParser.Parse(NodeId);
|
||||
}
|
||||
catch (Exception ex) when (ex is FormatException or ArgumentException)
|
||||
{
|
||||
throw new CommandException($"Invalid --node value: {ex.Message}");
|
||||
}
|
||||
|
||||
IOpcUaClientService? service = null;
|
||||
try
|
||||
{
|
||||
var ct = console.RegisterCancellationHandler();
|
||||
(service, _) = await CreateServiceAndConnectAsync(ct);
|
||||
|
||||
var startNode = NodeIdParser.Parse(NodeId);
|
||||
var maxDepth = Recursive ? Depth : 1;
|
||||
|
||||
await BrowseNodeAsync(service, console, startNode, maxDepth, 0, ct);
|
||||
|
||||
@@ -15,10 +15,7 @@ public class ConnectCommand : CommandBase
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the server and prints the negotiated endpoint details for operator verification.
|
||||
/// </summary>
|
||||
/// <param name="console">The CLI console used for output and cancellation handling.</param>
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using System.Globalization;
|
||||
using CliFx.Attributes;
|
||||
using CliFx.Exceptions;
|
||||
using CliFx.Infrastructure;
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.CLI.Helpers;
|
||||
@@ -55,29 +56,69 @@ public class HistoryReadCommand : CommandBase
|
||||
[CommandOption("interval", Description = "Processing interval in milliseconds for aggregates")]
|
||||
public double IntervalMs { get; init; } = 3600000;
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the server and prints raw or processed historical values for the requested node.
|
||||
/// </summary>
|
||||
/// <param name="console">The CLI console used for output and cancellation handling.</param>
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
if (MaxValues <= 0)
|
||||
throw new CommandException($"--max must be greater than 0 (was {MaxValues}).");
|
||||
if (!string.IsNullOrEmpty(Aggregate) && IntervalMs <= 0)
|
||||
throw new CommandException($"--interval must be greater than 0 (was {IntervalMs}).");
|
||||
|
||||
NodeId nodeId;
|
||||
try
|
||||
{
|
||||
nodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
}
|
||||
catch (Exception ex) when (ex is FormatException or ArgumentException)
|
||||
{
|
||||
throw new CommandException($"Invalid --node value: {ex.Message}");
|
||||
}
|
||||
|
||||
DateTime start, end;
|
||||
try
|
||||
{
|
||||
start = string.IsNullOrEmpty(StartTime)
|
||||
? DateTime.UtcNow.AddHours(-24)
|
||||
: DateTime.Parse(StartTime, CultureInfo.InvariantCulture,
|
||||
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
|
||||
}
|
||||
catch (FormatException ex)
|
||||
{
|
||||
throw new CommandException($"Invalid --start value '{StartTime}': {ex.Message}. Expected ISO 8601 UTC format, e.g. 2026-01-15T08:00:00Z.");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
end = string.IsNullOrEmpty(EndTime)
|
||||
? DateTime.UtcNow
|
||||
: DateTime.Parse(EndTime, CultureInfo.InvariantCulture,
|
||||
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
|
||||
}
|
||||
catch (FormatException ex)
|
||||
{
|
||||
throw new CommandException($"Invalid --end value '{EndTime}': {ex.Message}. Expected ISO 8601 UTC format, e.g. 2026-01-15T08:00:00Z.");
|
||||
}
|
||||
|
||||
AggregateType aggregateType = default;
|
||||
if (!string.IsNullOrEmpty(Aggregate))
|
||||
{
|
||||
try
|
||||
{
|
||||
aggregateType = ParseAggregateType(Aggregate);
|
||||
}
|
||||
catch (ArgumentException ex)
|
||||
{
|
||||
throw new CommandException($"Invalid --aggregate value: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
IOpcUaClientService? service = null;
|
||||
try
|
||||
{
|
||||
var ct = console.RegisterCancellationHandler();
|
||||
(service, _) = await CreateServiceAndConnectAsync(ct);
|
||||
|
||||
var nodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
var start = string.IsNullOrEmpty(StartTime)
|
||||
? DateTime.UtcNow.AddHours(-24)
|
||||
: DateTime.Parse(StartTime, CultureInfo.InvariantCulture,
|
||||
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
|
||||
var end = string.IsNullOrEmpty(EndTime)
|
||||
? DateTime.UtcNow
|
||||
: DateTime.Parse(EndTime, CultureInfo.InvariantCulture,
|
||||
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
|
||||
|
||||
IReadOnlyList<DataValue> values;
|
||||
|
||||
if (string.IsNullOrEmpty(Aggregate))
|
||||
@@ -88,7 +129,6 @@ public class HistoryReadCommand : CommandBase
|
||||
}
|
||||
else
|
||||
{
|
||||
var aggregateType = ParseAggregateType(Aggregate);
|
||||
await console.Output.WriteLineAsync(
|
||||
$"History for {NodeId} ({Aggregate}, interval={IntervalMs}ms)");
|
||||
values = await service.HistoryReadAggregateAsync(
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
using CliFx.Attributes;
|
||||
using CliFx.Exceptions;
|
||||
using CliFx.Infrastructure;
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.CLI.Helpers;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.Shared;
|
||||
|
||||
@@ -22,20 +24,27 @@ public class ReadCommand : CommandBase
|
||||
[CommandOption("node", 'n', Description = "Node ID (e.g. ns=2;s=MyNode)", IsRequired = true)]
|
||||
public string NodeId { get; init; } = default!;
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the server and prints the current value, status, and timestamps for the requested node.
|
||||
/// </summary>
|
||||
/// <param name="console">The CLI console used for output and cancellation handling.</param>
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
|
||||
NodeId nodeId;
|
||||
try
|
||||
{
|
||||
nodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
}
|
||||
catch (Exception ex) when (ex is FormatException or ArgumentException)
|
||||
{
|
||||
throw new CommandException($"Invalid --node value: {ex.Message}");
|
||||
}
|
||||
|
||||
IOpcUaClientService? service = null;
|
||||
try
|
||||
{
|
||||
var ct = console.RegisterCancellationHandler();
|
||||
(service, _) = await CreateServiceAndConnectAsync(ct);
|
||||
|
||||
var nodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
var value = await service.ReadValueAsync(nodeId, ct);
|
||||
|
||||
await console.Output.WriteLineAsync($"Node: {NodeId}");
|
||||
|
||||
@@ -15,10 +15,8 @@ public class RedundancyCommand : CommandBase
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the server and prints redundancy mode, service level, and partner-server identity data.
|
||||
/// </summary>
|
||||
/// <param name="console">The CLI console used for output and cancellation handling.</param>
|
||||
/// <summary>Connects to the server and prints redundancy mode, service level, and partner-server identity data.</summary>
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Threading.Channels;
|
||||
using CliFx.Attributes;
|
||||
using CliFx.Exceptions;
|
||||
using CliFx.Infrastructure;
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.CLI.Helpers;
|
||||
@@ -12,42 +13,88 @@ namespace ZB.MOM.WW.OtOpcUa.Client.CLI.Commands;
|
||||
[Command("subscribe", Description = "Monitor a node for value changes")]
|
||||
public class SubscribeCommand : CommandBase
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates the subscribe command used to monitor a node (or a subtree of nodes) for data-change
|
||||
/// notifications.
|
||||
/// </summary>
|
||||
/// <param name="factory">The factory that creates the shared client service for the command run.</param>
|
||||
public SubscribeCommand(IOpcUaClientServiceFactory factory) : base(factory)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the node ID to monitor. When <see cref="Recursive" /> is set, this node is the browse root
|
||||
/// and every <c>Variable</c> child it reaches is subscribed.
|
||||
/// </summary>
|
||||
[CommandOption("node", 'n', Description = "Node ID to monitor", IsRequired = true)]
|
||||
public string NodeId { get; init; } = default!;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the sampling interval, in milliseconds, requested for every monitored item.
|
||||
/// </summary>
|
||||
[CommandOption("interval", 'i', Description = "Sampling interval in milliseconds")]
|
||||
public int Interval { get; init; } = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether the command should browse from <see cref="NodeId" />
|
||||
/// and subscribe to every <c>Variable</c> in the subtree.
|
||||
/// </summary>
|
||||
[CommandOption("recursive", 'r', Description = "Browse recursively from --node and subscribe to every Variable found")]
|
||||
public bool Recursive { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum recursion depth applied while collecting variables when <see cref="Recursive" /> is set.
|
||||
/// </summary>
|
||||
[CommandOption("max-depth", Description = "Maximum recursion depth when --recursive is set")]
|
||||
public int MaxDepth { get; init; } = 10;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether per-update lines should be suppressed in favour of the final summary only.
|
||||
/// </summary>
|
||||
[CommandOption("quiet", 'q', Description = "Suppress per-update output; only print a final summary on Ctrl+C")]
|
||||
public bool Quiet { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the duration, in seconds, before the command auto-exits and prints its summary.
|
||||
/// A value of <c>0</c> means the command runs until Ctrl+C.
|
||||
/// </summary>
|
||||
[CommandOption("duration", Description = "Auto-exit after N seconds and print summary (0 = run until Ctrl+C)")]
|
||||
public int DurationSeconds { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the optional path that the command should write the final summary to on exit, in addition to stdout.
|
||||
/// </summary>
|
||||
[CommandOption("summary-file", Description = "Write summary to this file path on exit (in addition to stdout)")]
|
||||
public string? SummaryFile { get; init; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
|
||||
if (Interval <= 0)
|
||||
throw new CommandException($"--interval must be greater than 0 (was {Interval}).");
|
||||
if (Recursive && MaxDepth <= 0)
|
||||
throw new CommandException($"--max-depth must be greater than 0 (was {MaxDepth}).");
|
||||
if (DurationSeconds < 0)
|
||||
throw new CommandException($"--duration must be 0 or a positive number (was {DurationSeconds}).");
|
||||
|
||||
NodeId rootNodeId;
|
||||
try
|
||||
{
|
||||
rootNodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
}
|
||||
catch (Exception ex) when (ex is FormatException or ArgumentException)
|
||||
{
|
||||
throw new CommandException($"Invalid --node value: {ex.Message}");
|
||||
}
|
||||
|
||||
IOpcUaClientService? service = null;
|
||||
try
|
||||
{
|
||||
var ct = console.RegisterCancellationHandler();
|
||||
(service, _) = await CreateServiceAndConnectAsync(ct);
|
||||
|
||||
var rootNodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
|
||||
var targets = new List<(NodeId nodeId, string displayPath)>();
|
||||
if (Recursive)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using CliFx.Attributes;
|
||||
using CliFx.Exceptions;
|
||||
using CliFx.Infrastructure;
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Client.CLI.Helpers;
|
||||
@@ -34,17 +35,27 @@ public class WriteCommand : CommandBase
|
||||
/// Connects to the server, converts the supplied value to the node's current data type, and issues the write.
|
||||
/// </summary>
|
||||
/// <param name="console">The CLI console used for output and cancellation handling.</param>
|
||||
/// <inheritdoc />
|
||||
public override async ValueTask ExecuteAsync(IConsole console)
|
||||
{
|
||||
ConfigureLogging();
|
||||
|
||||
NodeId nodeId;
|
||||
try
|
||||
{
|
||||
nodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
}
|
||||
catch (Exception ex) when (ex is FormatException or ArgumentException)
|
||||
{
|
||||
throw new CommandException($"Invalid --node value: {ex.Message}");
|
||||
}
|
||||
|
||||
IOpcUaClientService? service = null;
|
||||
try
|
||||
{
|
||||
var ct = console.RegisterCancellationHandler();
|
||||
(service, _) = await CreateServiceAndConnectAsync(ct);
|
||||
|
||||
var nodeId = NodeIdParser.ParseRequired(NodeId);
|
||||
|
||||
// Read current value to determine type for conversion
|
||||
var currentValue = await service.ReadValueAsync(nodeId, ct);
|
||||
var typedValue = ValueConverter.ConvertValue(Value, currentValue.Value);
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CliFx" Version="2.3.6"/>
|
||||
<PackageReference Include="Serilog" Version="4.2.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.0.0"/>
|
||||
<PackageReference Include="CliFx"/>
|
||||
<PackageReference Include="Serilog"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
+10
-1
@@ -12,9 +12,18 @@ internal sealed class DefaultApplicationConfigurationFactory : IApplicationConfi
|
||||
{
|
||||
private static readonly ILogger Logger = Log.ForContext<DefaultApplicationConfigurationFactory>();
|
||||
|
||||
/// <summary>Creates an OPC UA application configuration from the provided connection settings.</summary>
|
||||
/// <param name="settings">The connection settings to use.</param>
|
||||
/// <param name="ct">Token to cancel the operation.</param>
|
||||
public async Task<ApplicationConfiguration> CreateAsync(ConnectionSettings settings, CancellationToken ct)
|
||||
{
|
||||
var storePath = settings.CertificateStorePath;
|
||||
// Resolve the canonical PKI path lazily on first use so constructing a
|
||||
// ConnectionSettings instance — including the throwaway copies the client
|
||||
// service builds per failover attempt — does not touch the filesystem.
|
||||
// Callers that supply an explicit path override the default.
|
||||
var storePath = string.IsNullOrWhiteSpace(settings.CertificateStorePath)
|
||||
? ClientStoragePaths.GetPkiPath()
|
||||
: settings.CertificateStorePath;
|
||||
|
||||
var config = new ApplicationConfiguration
|
||||
{
|
||||
|
||||
@@ -11,6 +11,10 @@ internal sealed class DefaultEndpointDiscovery : IEndpointDiscovery
|
||||
{
|
||||
private static readonly ILogger Logger = Log.ForContext<DefaultEndpointDiscovery>();
|
||||
|
||||
/// <summary>Selects an OPC UA endpoint matching the requested security mode.</summary>
|
||||
/// <param name="config">The application configuration.</param>
|
||||
/// <param name="endpointUrl">The endpoint URL to query.</param>
|
||||
/// <param name="requestedMode">The requested message security mode.</param>
|
||||
public EndpointDescription SelectEndpoint(ApplicationConfiguration config, string endpointUrl,
|
||||
MessageSecurityMode requestedMode)
|
||||
{
|
||||
@@ -24,9 +28,47 @@ internal sealed class DefaultEndpointDiscovery : IEndpointDiscovery
|
||||
using var client = DiscoveryClient.Create(new Uri(endpointUrl));
|
||||
var allEndpoints = client.GetEndpoints(null);
|
||||
|
||||
return EndpointSelector.SelectBest(allEndpoints, endpointUrl, requestedMode);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pure best-endpoint selection logic, extracted from <see cref="DefaultEndpointDiscovery"/>
|
||||
/// so it can be unit tested without standing up a real <see cref="DiscoveryClient"/>.
|
||||
/// </summary>
|
||||
internal static class EndpointSelector
|
||||
{
|
||||
private static readonly ILogger Logger = Log.ForContext(typeof(EndpointSelector));
|
||||
|
||||
/// <summary>
|
||||
/// Picks the best endpoint from the discovery response that matches the requested
|
||||
/// security mode, preferring <c>Basic256Sha256</c>, and rewrites the endpoint URL
|
||||
/// host to match the user-supplied URL when the discovery response advertises a
|
||||
/// different hostname.
|
||||
/// </summary>
|
||||
/// <param name="allEndpoints">Endpoints returned by the discovery query, in any order.</param>
|
||||
/// <param name="endpointUrl">The endpoint URL the operator supplied; supplies the hostname rewrite target.</param>
|
||||
/// <param name="requestedMode">The requested OPC UA message security mode.</param>
|
||||
/// <exception cref="InvalidOperationException">
|
||||
/// Thrown when no endpoint matches <paramref name="requestedMode"/>; the message lists the
|
||||
/// security mode + policy combinations the server returned so operators can diagnose mismatches.
|
||||
/// </exception>
|
||||
public static EndpointDescription SelectBest(
|
||||
IEnumerable<EndpointDescription> allEndpoints,
|
||||
string endpointUrl,
|
||||
MessageSecurityMode requestedMode)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(allEndpoints);
|
||||
if (string.IsNullOrWhiteSpace(endpointUrl))
|
||||
throw new ArgumentException("Endpoint URL must not be null or empty.", nameof(endpointUrl));
|
||||
|
||||
// Materialise once so we can both iterate and produce a diagnostic message
|
||||
// without re-running the underlying discovery enumeration.
|
||||
var endpoints = allEndpoints.ToList();
|
||||
|
||||
EndpointDescription? best = null;
|
||||
|
||||
foreach (var ep in allEndpoints)
|
||||
foreach (var ep in endpoints)
|
||||
{
|
||||
if (ep.SecurityMode != requestedMode)
|
||||
continue;
|
||||
@@ -37,18 +79,21 @@ internal sealed class DefaultEndpointDiscovery : IEndpointDiscovery
|
||||
continue;
|
||||
}
|
||||
|
||||
// Prefer Basic256Sha256 when multiple endpoints match the requested mode.
|
||||
if (ep.SecurityPolicyUri == SecurityPolicies.Basic256Sha256)
|
||||
best = ep;
|
||||
}
|
||||
|
||||
if (best == null)
|
||||
{
|
||||
var available = string.Join(", ", allEndpoints.Select(e => $"{e.SecurityMode}/{e.SecurityPolicyUri}"));
|
||||
var available = string.Join(", ", endpoints.Select(e => $"{e.SecurityMode}/{e.SecurityPolicyUri}"));
|
||||
throw new InvalidOperationException(
|
||||
$"No endpoint found with security mode '{requestedMode}'. Available endpoints: {available}");
|
||||
}
|
||||
|
||||
// Rewrite endpoint URL hostname to match user-supplied hostname
|
||||
// Rewrite endpoint URL hostname to match user-supplied hostname. Necessary
|
||||
// when the OPC UA server returns a discovery URL using a different hostname
|
||||
// (e.g. internal DNS name) than the one the operator routed to.
|
||||
var serverUri = new Uri(best.EndpointUrl);
|
||||
var requestedUri = new Uri(endpointUrl);
|
||||
if (serverUri.Host != requestedUri.Host)
|
||||
|
||||
@@ -73,6 +73,17 @@ internal sealed class DefaultSessionAdapter : ISessionAdapter
|
||||
|
||||
var writeCollection = new WriteValueCollection { writeValue };
|
||||
var response = await _session.WriteAsync(null, writeCollection, ct);
|
||||
// A malformed or service-level-faulted response can come back with an empty
|
||||
// Results collection alongside a service fault. Surface the service result
|
||||
// (or BadUnexpectedError) rather than letting Results[0] throw
|
||||
// IndexOutOfRangeException upstream.
|
||||
if (response.Results == null || response.Results.Count == 0)
|
||||
{
|
||||
var serviceResult = response.ResponseHeader?.ServiceResult.Code ?? StatusCodes.BadUnexpectedError;
|
||||
throw new ServiceResultException(serviceResult,
|
||||
$"Write response contained no results for node {nodeId}.");
|
||||
}
|
||||
|
||||
return response.Results[0];
|
||||
}
|
||||
|
||||
@@ -143,15 +154,18 @@ internal sealed class DefaultSessionAdapter : ISessionAdapter
|
||||
if (continuationPoint != null)
|
||||
nodesToRead[0].ContinuationPoint = continuationPoint;
|
||||
|
||||
_session.HistoryRead(
|
||||
// Use the async overload so this method is genuinely asynchronous,
|
||||
// honors the cancellation token, and does not block the caller's thread
|
||||
// (which would block the UI dispatcher for client.ui consumers).
|
||||
var response = await _session.HistoryReadAsync(
|
||||
null,
|
||||
new ExtensionObject(details),
|
||||
TimestampsToReturn.Source,
|
||||
continuationPoint != null,
|
||||
nodesToRead,
|
||||
out var results,
|
||||
out _);
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
var results = response.Results;
|
||||
if (results == null || results.Count == 0)
|
||||
break;
|
||||
|
||||
@@ -186,15 +200,17 @@ internal sealed class DefaultSessionAdapter : ISessionAdapter
|
||||
new HistoryReadValueId { NodeId = nodeId }
|
||||
};
|
||||
|
||||
_session.HistoryRead(
|
||||
// Use the async overload so the method honors the cancellation token and
|
||||
// does not block on a synchronous service round-trip.
|
||||
var response = await _session.HistoryReadAsync(
|
||||
null,
|
||||
new ExtensionObject(details),
|
||||
TimestampsToReturn.Source,
|
||||
false,
|
||||
nodesToRead,
|
||||
out var results,
|
||||
out _);
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
var results = response.Results;
|
||||
var allValues = new List<DataValue>();
|
||||
|
||||
if (results != null && results.Count > 0)
|
||||
@@ -229,7 +245,9 @@ internal sealed class DefaultSessionAdapter : ISessionAdapter
|
||||
{
|
||||
try
|
||||
{
|
||||
if (_session.Connected) _session.Close();
|
||||
// Use the async overload so the caller does not block on the close
|
||||
// service round-trip and the cancellation token is honored.
|
||||
if (_session.Connected) await _session.CloseAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
@@ -270,6 +288,15 @@ internal sealed class DefaultSessionAdapter : ISessionAdapter
|
||||
},
|
||||
ct);
|
||||
|
||||
// An empty Results collection paired with a service fault must surface as
|
||||
// a ServiceResultException, not an IndexOutOfRangeException from Results[0].
|
||||
if (result.Results == null || result.Results.Count == 0)
|
||||
{
|
||||
var serviceResult = result.ResponseHeader?.ServiceResult.Code ?? StatusCodes.BadUnexpectedError;
|
||||
throw new ServiceResultException(serviceResult,
|
||||
$"Call response contained no results for method {methodId} on {objectId}.");
|
||||
}
|
||||
|
||||
var callResult = result.Results[0];
|
||||
if (StatusCode.IsBad(callResult.StatusCode))
|
||||
throw new ServiceResultException(callResult.StatusCode);
|
||||
|
||||
@@ -11,6 +11,14 @@ internal sealed class DefaultSessionFactory : ISessionFactory
|
||||
{
|
||||
private static readonly ILogger Logger = Log.ForContext<DefaultSessionFactory>();
|
||||
|
||||
/// <summary>Creates a new OPC UA session.</summary>
|
||||
/// <param name="config">The OPC UA application configuration.</param>
|
||||
/// <param name="endpoint">The endpoint description to connect to.</param>
|
||||
/// <param name="sessionName">The name for the session.</param>
|
||||
/// <param name="sessionTimeoutMs">The session timeout in milliseconds.</param>
|
||||
/// <param name="identity">The user identity for the session.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
/// <returns>An adapter wrapping the created session.</returns>
|
||||
public async Task<ISessionAdapter> CreateSessionAsync(
|
||||
ApplicationConfiguration config,
|
||||
EndpointDescription endpoint,
|
||||
|
||||
+2
@@ -11,5 +11,7 @@ internal interface IApplicationConfigurationFactory
|
||||
/// <summary>
|
||||
/// Creates a validated ApplicationConfiguration for the given connection settings.
|
||||
/// </summary>
|
||||
/// <param name="settings">The connection settings to configure.</param>
|
||||
/// <param name="ct">Cancellation token for the operation.</param>
|
||||
Task<ApplicationConfiguration> CreateAsync(ConnectionSettings settings, CancellationToken ct = default);
|
||||
}
|
||||
@@ -11,6 +11,9 @@ internal interface IEndpointDiscovery
|
||||
/// Discovers endpoints at the given URL and returns the best match for the requested security mode.
|
||||
/// Also rewrites the endpoint URL hostname to match the requested URL when they differ.
|
||||
/// </summary>
|
||||
/// <param name="config">The OPC UA application configuration.</param>
|
||||
/// <param name="endpointUrl">The endpoint URL to discover.</param>
|
||||
/// <param name="requestedMode">The requested message security mode.</param>
|
||||
EndpointDescription SelectEndpoint(ApplicationConfiguration config, string endpointUrl,
|
||||
MessageSecurityMode requestedMode);
|
||||
}
|
||||
@@ -11,6 +11,8 @@ public static class AggregateTypeMapper
|
||||
/// <summary>
|
||||
/// Returns the OPC UA NodeId for the specified aggregate type.
|
||||
/// </summary>
|
||||
/// <param name="aggregate">The aggregate type to map to a NodeId.</param>
|
||||
/// <returns>The OPC UA NodeId for the aggregate function.</returns>
|
||||
public static NodeId ToNodeId(AggregateType aggregate)
|
||||
{
|
||||
return aggregate switch
|
||||
|
||||
@@ -8,9 +8,9 @@ namespace ZB.MOM.WW.OtOpcUa.Client.Shared.Helpers;
|
||||
/// </summary>
|
||||
public static class SecurityModeMapper
|
||||
{
|
||||
/// <summary>
|
||||
/// Converts a <see cref="SecurityMode" /> to an OPC UA <see cref="MessageSecurityMode" />.
|
||||
/// </summary>
|
||||
/// <summary>Converts a SecurityMode to an OPC UA MessageSecurityMode.</summary>
|
||||
/// <param name="mode">The security mode to convert.</param>
|
||||
/// <returns>The corresponding message security mode.</returns>
|
||||
public static MessageSecurityMode ToMessageSecurityMode(SecurityMode mode)
|
||||
{
|
||||
return mode switch
|
||||
|
||||
@@ -96,6 +96,11 @@ public interface IOpcUaClientService : IDisposable
|
||||
/// <param name="eventId">The event identifier returned by the OPC UA server for the alarm event.</param>
|
||||
/// <param name="comment">The operator acknowledgment comment to write with the method call.</param>
|
||||
/// <param name="ct">The cancellation token that aborts the acknowledgment request.</param>
|
||||
/// <returns>
|
||||
/// <see cref="StatusCodes.Good"/> on success, or the server's bad <see cref="StatusCode"/>
|
||||
/// (from the underlying <see cref="ServiceResultException"/>) when the acknowledge call
|
||||
/// returns a bad result. Other transport-level failures still surface as exceptions.
|
||||
/// </returns>
|
||||
Task<StatusCode> AcknowledgeAlarmAsync(string conditionNodeId, byte[] eventId, string comment, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -5,5 +5,7 @@ namespace ZB.MOM.WW.OtOpcUa.Client.Shared;
|
||||
/// </summary>
|
||||
public interface IOpcUaClientServiceFactory
|
||||
{
|
||||
/// <summary>Creates a new OPC UA client service instance.</summary>
|
||||
/// <returns>A new <see cref="IOpcUaClientService"/> instance.</returns>
|
||||
IOpcUaClientService Create();
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user