Compare commits
152 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f02071c9a2 | |||
| 993e012e55 | |||
| 961e09430a | |||
| a1a7646b33 | |||
| e4d0d82f7f | |||
| 2915755a7c | |||
| a5c6ce279e | |||
| 59b3d9f295 | |||
| 89095c15e3 | |||
| bdae749b2b | |||
| e8c4f18607 | |||
| cb936db7d6 | |||
| a5412c16a3 | |||
| dce2528c68 | |||
| 83eda9e826 | |||
| 70ffd2849d | |||
| 898a47746d | |||
| 25ce111981 | |||
| 7209bc99e2 | |||
| 2c49f18442 | |||
| 05a0596fb1 | |||
| 219d10a22d | |||
| 607dc51dec | |||
| 9d86287d08 | |||
| 2697af31d1 | |||
| 52997ee164 | |||
| 21eac21409 | |||
| 8b08566f41 | |||
| 50787823d3 | |||
| 7e22e2250c | |||
| d21f6947e1 | |||
| 7fa863f6da | |||
| f427dc4f26 | |||
| 3e3f7588bd | |||
| c02f016f1d | |||
| a1325299ce | |||
| 14fb2b05ed | |||
| da141497f8 | |||
| 9892ceae9a | |||
| 59858129cb | |||
| e248e037e7 | |||
| ae980aef5d | |||
| 2662ac08e4 | |||
| 45740578c9 | |||
| 5ae67a48ba | |||
| d055cb059e | |||
| 74161f9460 | |||
| 396052a126 | |||
| fd0cc4dfdb | |||
| 850d6774ea | |||
| 5c754ecffd | |||
| 68c6f36cfe | |||
| 36c4751571 | |||
| 229282ad8b | |||
| b0a2bb037d | |||
| ba6e5dd7f9 | |||
| 686138123f | |||
| cd5540cb1a | |||
| 4e6ef648d1 | |||
| f18c285cca | |||
| 7a6b016d9e | |||
| 8f32b89fb9 | |||
| 337a691629 | |||
| b06e3ae740 | |||
| f57f61deac | |||
| 8e5c8e29f7 | |||
| 253fb60459 | |||
| 8ac71db464 | |||
| 7e3b56c27d | |||
| e40615dad5 | |||
| 1689901c0e | |||
| 3c3fef911c | |||
| a8becc9c46 | |||
| 5cfbe8b5dd | |||
| 62e3cd6599 | |||
| d6fac2d81d | |||
| bb353c4d43 | |||
| 45a8c79ffe | |||
| b266f63cd7 | |||
| dfc143cdeb | |||
| 463512d1d8 | |||
| 09d6676e1f | |||
| 698709a578 | |||
| 76310b8829 | |||
| 2b75ce3876 | |||
| 8b4de8080b | |||
| fa1d685ccd | |||
| e2b357f89a | |||
| eb4280b7eb | |||
| 8a1f97b27f | |||
| f167808a2c | |||
| b83f099394 | |||
| f022499e7f | |||
| 26d8f2f620 | |||
| 1a067e609c | |||
| 5e31449529 | |||
| b7c117ab31 | |||
| 2877a883cd | |||
| 2e4f1399bb | |||
| e31547d00e | |||
| 28639cb14d | |||
| e115f13104 | |||
| 95ef533822 | |||
| 39729bfe21 | |||
| 64c627f8d6 | |||
| ed130135ca | |||
| ea6f972e96 | |||
| 52bf4b3371 | |||
| dd122c4ca9 | |||
| f193872891 | |||
| bad2aef137 | |||
| 6b37f997ad | |||
| 62e12dab95 | |||
| ef683f5073 | |||
| 9f61cd5989 | |||
| 9582e448d5 | |||
| 1955bc5f4d | |||
| 23f669c376 | |||
| 14acab5a58 | |||
| 32574b3e4e | |||
| fc22d4f7b6 | |||
| 973a3d1b9a | |||
| 38ea0c5086 | |||
| e38f22e3c2 | |||
| 8be84ba27b | |||
| 207fc6aba9 | |||
| 93316e3431 | |||
| 567b8cac1d | |||
| f35925b57e | |||
| e0b6d5680b | |||
| c217c49f69 | |||
| dfb06368cd | |||
| f184f8ed1b | |||
| 3d0f4dc168 | |||
| fdb4ac7051 | |||
| 136234e7f2 | |||
| 5d3a5a40d7 | |||
| fee4a8c008 | |||
| c168c1c9c6 | |||
| 605dbf3dcc | |||
| e00f46d723 | |||
| 3c915e652e | |||
| 1ddf8bb50e | |||
| 13d3aeab09 | |||
| 4bb4ad8acb | |||
| 990ce343fe | |||
| 8e2c4f2835 | |||
| 30a2104fa5 | |||
| 2b811477d1 | |||
| fac32ad69b | |||
| ef4a70751c | |||
| 866dc03fac |
@@ -0,0 +1,76 @@
|
||||
# CI for the v2 branch — runs on every push + PR to the v2-akka-fuse / master
|
||||
# branches. Layered into three jobs:
|
||||
# build dotnet restore + build (fast feedback on compile errors)
|
||||
# unit-tests every v2 unit-test project
|
||||
# integration 2-node Host.IntegrationTests harness
|
||||
#
|
||||
# Skips E2E (Category=E2E) — that runs nightly via v2-e2e.yml against the full
|
||||
# four-node docker-dev stack.
|
||||
#
|
||||
# Compatible with both GitHub Actions and Gitea Actions (act_runner). The .NET 10
|
||||
# SDK is pinned via global.json at the repo root; if no global.json exists, the
|
||||
# setup-dotnet step falls back to dotnet-version below.
|
||||
|
||||
name: v2-ci
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [v2-akka-fuse, master]
|
||||
pull_request:
|
||||
branches: [v2-akka-fuse, master]
|
||||
workflow_dispatch: {}
|
||||
|
||||
env:
|
||||
DOTNET_NOLOGO: "1"
|
||||
DOTNET_CLI_TELEMETRY_OPTOUT: "1"
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet restore
|
||||
run: dotnet restore ZB.MOM.WW.OtOpcUa.slnx
|
||||
- name: dotnet build
|
||||
run: dotnet build ZB.MOM.WW.OtOpcUa.slnx --no-restore --configuration Release
|
||||
|
||||
unit-tests:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet test ${{ matrix.project }}
|
||||
run: dotnet test ${{ matrix.project }} --configuration Release --filter "Category!=E2E"
|
||||
|
||||
integration:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet test ${{ matrix.project }}
|
||||
run: dotnet test ${{ matrix.project }} --configuration Release --filter "Category!=E2E"
|
||||
@@ -0,0 +1,57 @@
|
||||
# Nightly E2E job. Runs against the docker-dev four-node fleet (admin-a +
|
||||
# admin-b + driver-a + driver-b + SQL + LDAP + Traefik). Trigger:
|
||||
# - cron at 03:00 UTC daily
|
||||
# - workflow_dispatch from the Actions UI for on-demand runs
|
||||
#
|
||||
# The E2E test project (tests/Server/ZB.MOM.WW.OtOpcUa.E2ETests) does not yet
|
||||
# exist — it lands when the F-series follow-ups F10/F11/F12 wire enough of the
|
||||
# SDK/historian/probe so an end-to-end driver round-trip is meaningful. Until
|
||||
# then this workflow is a green no-op (the `--filter Category=E2E` matches
|
||||
# zero tests, and `dotnet test` returns 0).
|
||||
|
||||
name: v2-e2e
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 3 * * *"
|
||||
workflow_dispatch: {}
|
||||
|
||||
env:
|
||||
DOTNET_NOLOGO: "1"
|
||||
DOTNET_CLI_TELEMETRY_OPTOUT: "1"
|
||||
|
||||
jobs:
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: dotnet restore
|
||||
run: dotnet restore ZB.MOM.WW.OtOpcUa.slnx
|
||||
|
||||
- name: Build docker-dev fleet
|
||||
run: docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
|
||||
- name: Wait for cluster
|
||||
run: |
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf http://localhost/health/active >/dev/null; then
|
||||
echo "Admin leader healthy after ${i}s"
|
||||
exit 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
echo "Timed out waiting for /health/active"
|
||||
docker compose -f docker-dev/docker-compose.yml logs --tail=200
|
||||
exit 1
|
||||
|
||||
- name: dotnet test (E2E only)
|
||||
run: dotnet test --configuration Release --filter "Category=E2E"
|
||||
|
||||
- name: Tear down
|
||||
if: always()
|
||||
run: docker compose -f docker-dev/docker-compose.yml down -v
|
||||
@@ -0,0 +1,18 @@
|
||||
<Project>
|
||||
<!--
|
||||
Defaults inherited by every csproj. Individual projects may override.
|
||||
|
||||
Deviation from the original v2 plan: TreatWarningsAsErrors is NOT set globally because the
|
||||
pre-v2 test projects (e.g. Admin.Tests) carry 240+ xUnit1051 analyzer warnings that would
|
||||
fail the build. New v2 projects (Commons, Cluster, ControlPlane, Runtime, OpcUaServer, AdminUI,
|
||||
Host, Security) MUST opt in to <TreatWarningsAsErrors>true</TreatWarningsAsErrors> in their
|
||||
own csproj. Once the legacy Admin/Server projects are deleted (Phase 10, Task 56), this can
|
||||
be promoted back to a global default.
|
||||
-->
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<LangVersion>latest</LangVersion>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,103 @@
|
||||
<Project>
|
||||
|
||||
<PropertyGroup>
|
||||
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageVersion Include="Akka" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Cluster" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Cluster.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Cluster.Tools" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Remote" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Remote.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Streams" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Streams.TestKit" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.TestKit.Xunit2" Version="1.5.62" />
|
||||
<PackageVersion Include="Avalonia" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Controls.DataGrid" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Desktop" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Diagnostics" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Fonts.Inter" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Headless" Version="11.2.7" />
|
||||
<PackageVersion Include="Avalonia.Svg.Skia" Version="11.2.0.2" />
|
||||
<PackageVersion Include="Avalonia.Themes.Fluent" Version="11.2.7" />
|
||||
<PackageVersion Include="Beckhoff.TwinCAT.Ads" Version="7.0.172" />
|
||||
<PackageVersion Include="bunit" Version="2.0.33-preview" />
|
||||
<PackageVersion Include="CliFx" Version="2.3.6" />
|
||||
<PackageVersion Include="CommunityToolkit.Mvvm" Version="8.4.0" />
|
||||
<PackageVersion Include="coverlet.collector" Version="6.0.4" />
|
||||
<PackageVersion Include="FluentAssertions" Version="8.3.0" />
|
||||
<PackageVersion Include="Google.Protobuf" Version="3.34.1" />
|
||||
<PackageVersion Include="Grpc.Core.Api" Version="2.76.0" />
|
||||
<PackageVersion Include="Grpc.Net.Client" Version="2.76.0" />
|
||||
<PackageVersion Include="libplctag" Version="1.5.2" />
|
||||
<PackageVersion Include="LiteDB" Version="5.0.21" />
|
||||
<PackageVersion Include="MessagePack" Version="2.5.187" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.Authentication.JwtBearer" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.Authorization" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.DataProtection" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.DataProtection.EntityFrameworkCore" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.Mvc.Testing" Version="10.0.0" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.SignalR.Client" Version="10.0.0" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.SignalR.Core" Version="1.2.0" />
|
||||
<PackageVersion Include="Microsoft.AspNetCore.TestHost" Version="10.0.7" />
|
||||
<!--
|
||||
Roslyn analyzer packages pin to the same major version as the SDK's compiler.
|
||||
.NET SDK 10.0.105 ships compiler 5.0.0.0. Microsoft.CodeAnalysis.CSharp 5.3.x emits
|
||||
analyzer DLLs that reference compiler 5.3.0.0 and fail with CS9057 on the local SDK.
|
||||
Pin to 5.0.0 (matches the compiler the SDK ships) until the SDK rolls to 10.0.110+.
|
||||
-->
|
||||
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp" Version="5.0.0" />
|
||||
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp.Scripting" Version="4.12.0" />
|
||||
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="5.0.0" />
|
||||
<PackageVersion Include="Microsoft.Data.SqlClient" Version="6.1.1" />
|
||||
<PackageVersion Include="Microsoft.Data.Sqlite" Version="9.0.0" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.InMemory" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Configuration.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Configuration.Json" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.DependencyInjection" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Hosting" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Http" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Logging" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Options" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.IdentityModel.Tokens" Version="8.11.0" />
|
||||
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
|
||||
<PackageVersion Include="Microsoft.Playwright" Version="1.51.0" />
|
||||
<PackageVersion Include="Novell.Directory.Ldap.NETStandard" Version="3.6.0" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Client" Version="1.5.378.106" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Configuration" Version="1.5.378.106" />
|
||||
<PackageVersion Include="OPCFoundation.NetStandard.Opc.Ua.Server" Version="1.5.374.126" />
|
||||
<PackageVersion Include="OpenTelemetry.Exporter.Prometheus.AspNetCore" Version="1.15.3-beta.1" />
|
||||
<PackageVersion Include="OpenTelemetry.Extensions.Hosting" Version="1.15.3" />
|
||||
<PackageVersion Include="Polly.Core" Version="8.6.6" />
|
||||
<PackageVersion Include="S7netplus" Version="0.20.0" />
|
||||
<PackageVersion Include="Serilog" Version="4.3.0" />
|
||||
<PackageVersion Include="Serilog.AspNetCore" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Extensions.Hosting" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Formatting.Compact" Version="3.0.0" />
|
||||
<PackageVersion Include="Serilog.Settings.Configuration" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.Console" Version="6.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.File" Version="7.0.0" />
|
||||
<PackageVersion Include="Shouldly" Version="4.3.0" />
|
||||
<PackageVersion Include="System.CommandLine" Version="2.0.5" />
|
||||
<PackageVersion Include="System.Data.SqlClient" Version="4.9.0" />
|
||||
<PackageVersion Include="System.IdentityModel.Tokens.Jwt" Version="8.11.0" />
|
||||
<PackageVersion Include="System.IO.Pipes.AccessControl" Version="5.0.0" />
|
||||
<PackageVersion Include="System.Memory" Version="4.5.5" />
|
||||
<PackageVersion Include="System.Threading.Tasks.Extensions" Version="4.5.4" />
|
||||
<PackageVersion Include="xunit" Version="2.9.2" />
|
||||
<PackageVersion Include="xunit.runner.visualstudio" Version="3.0.2" />
|
||||
<PackageVersion Include="xunit.v3" Version="1.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
+15
-5
@@ -2,6 +2,8 @@
|
||||
<Folder Name="/src/" />
|
||||
<Folder Name="/src/Core/">
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ZB.MOM.WW.OtOpcUa.Core.Abstractions.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Cluster/ZB.MOM.WW.OtOpcUa.Cluster.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Commons/ZB.MOM.WW.OtOpcUa.Commons.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Configuration/ZB.MOM.WW.OtOpcUa.Configuration.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core/ZB.MOM.WW.OtOpcUa.Core.csproj" />
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting/ZB.MOM.WW.OtOpcUa.Core.Scripting.csproj" />
|
||||
@@ -10,8 +12,12 @@
|
||||
<Project Path="src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/src/Server/">
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Admin/ZB.MOM.WW.OtOpcUa.Admin.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/ZB.MOM.WW.OtOpcUa.AdminUI.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/ZB.MOM.WW.OtOpcUa.ControlPlane.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Host/ZB.MOM.WW.OtOpcUa.Host.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/ZB.MOM.WW.OtOpcUa.OpcUaServer.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ZB.MOM.WW.OtOpcUa.Runtime.csproj" />
|
||||
<Project Path="src/Server/ZB.MOM.WW.OtOpcUa.Security/ZB.MOM.WW.OtOpcUa.Security.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/src/Drivers/">
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj" />
|
||||
@@ -46,6 +52,7 @@
|
||||
<Folder Name="/tests/" />
|
||||
<Folder Name="/tests/Core/">
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests/ZB.MOM.WW.OtOpcUa.Cluster.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Configuration.Tests/ZB.MOM.WW.OtOpcUa.Configuration.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/ZB.MOM.WW.OtOpcUa.Core.Tests.csproj" />
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests.csproj" />
|
||||
@@ -54,9 +61,12 @@
|
||||
<Project Path="tests/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.Tests/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.Tests.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/Server/">
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/ZB.MOM.WW.OtOpcUa.Server.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests/ZB.MOM.WW.OtOpcUa.Admin.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests/ZB.MOM.WW.OtOpcUa.Admin.E2ETests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/ZB.MOM.WW.OtOpcUa.Runtime.Tests.csproj" />
|
||||
<Project Path="tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests/ZB.MOM.WW.OtOpcUa.Security.Tests.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/Drivers/">
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.csproj" />
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
# Multi-stage build of OtOpcUa.Host targeting linux-x64. Used by docker-dev/docker-compose.yml
|
||||
# to spin four host containers (admin-a, admin-b, driver-a, driver-b) from a single image —
|
||||
# Compose drives OTOPCUA_ROLES + Cluster:* env per container to differentiate them.
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
WORKDIR /src
|
||||
COPY . .
|
||||
RUN dotnet restore ZB.MOM.WW.OtOpcUa.slnx
|
||||
RUN dotnet publish src/Server/ZB.MOM.WW.OtOpcUa.Host/ZB.MOM.WW.OtOpcUa.Host.csproj \
|
||||
-c Release -o /app --no-restore
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime
|
||||
WORKDIR /app
|
||||
COPY --from=build /app ./
|
||||
|
||||
EXPOSE 9000
|
||||
EXPOSE 4053
|
||||
EXPOSE 4840
|
||||
|
||||
ENTRYPOINT ["dotnet", "OtOpcUa.Host.dll"]
|
||||
@@ -0,0 +1,112 @@
|
||||
# docker-dev
|
||||
|
||||
Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up **three isolated Akka clusters** + SQL Server + OpenLDAP + Traefik on the same Compose network. All three clusters share the single `OtOpcUa` ConfigDb — multi-tenancy is enforced by per-row `ServerCluster.ClusterId` scoping. Akka.Cluster gossip stays isolated between meshes because their seed-node lists are disjoint, even though they share the same system name `otopcua`.
|
||||
|
||||
## Stack
|
||||
|
||||
### Shared infrastructure
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `sql` | SQL Server 2022 — single `OtOpcUa` ConfigDb shared by all three clusters | host `14330` → container `1433` |
|
||||
| `ldap` | OpenLDAP with dev users `alice` / `bob` | host `3893` → container `1389` |
|
||||
| `traefik` | Routes :80 by Host header / PathPrefix | host `80`, dashboard `8080` |
|
||||
|
||||
### Main cluster — split admin/driver roles
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `admin-a` | `OTOPCUA_ROLES=admin`, cluster seed | internal `9000` |
|
||||
| `admin-b` | `OTOPCUA_ROLES=admin`, joins admin-a | internal `9000` |
|
||||
| `driver-a` | `OTOPCUA_ROLES=driver` | host `4840` → container `4840` |
|
||||
| `driver-b` | `OTOPCUA_ROLES=driver` | host `4841` → container `4840` |
|
||||
|
||||
### Site A cluster — 2-node fused admin+driver
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `site-a-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4842` → container `4840` |
|
||||
| `site-a-2` | `OTOPCUA_ROLES=admin,driver`, joins site-a-1 | host `4843` → container `4840` |
|
||||
|
||||
### Site B cluster — 2-node fused admin+driver
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `site-b-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4844` → container `4840` |
|
||||
| `site-b-2` | `OTOPCUA_ROLES=admin,driver`, joins site-b-1 | host `4845` → container `4840` |
|
||||
|
||||
All containers bind Akka remoting to port `4053` inside their own network namespace; the `PublicHostname` of each matches its Compose service name. Akka mesh isolation is enforced purely by disjoint seed lists. Configuration-side isolation is enforced by `ServerCluster.ClusterId` — see "Multi-tenancy" below.
|
||||
|
||||
## Multi-tenancy
|
||||
|
||||
All eight host nodes write to the same `OtOpcUa` ConfigDb. The `ServerCluster` table differentiates the three Akka meshes: each Akka cluster maps to one row, and each `ClusterNode` row's `ClusterId` ties the runtime node back to its owning cluster scope.
|
||||
|
||||
A one-shot `cluster-seed` Compose service (image `mcr.microsoft.com/mssql-tools`) waits for SQL + the EF auto-migration to complete and then INSERTs the rows below. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops:
|
||||
|
||||
| Akka mesh | `ServerCluster.ClusterId` | `ClusterNode.NodeId` rows |
|
||||
|---|---|---|
|
||||
| Main | `MAIN` | `driver-a`, `driver-b` (OPC UA publishers) |
|
||||
| Site A | `SITE-A` | `site-a-1`, `site-a-2` |
|
||||
| Site B | `SITE-B` | `site-b-1`, `site-b-2` |
|
||||
|
||||
`ClusterNode` is the table for **OPC UA-publishing nodes** (not every Akka cluster member), which is why the main cluster's `admin-a` / `admin-b` don't get rows — they're control-plane-only.
|
||||
|
||||
Each `ClusterNode.NodeId` matches the node's `Cluster__PublicHostname` env value (Compose service name) — that's the lookup the runtime uses to resolve its own membership. `ApplicationUri` follows the `urn:OtOpcUa:<NodeId>` convention.
|
||||
|
||||
The SQL lives at `seed/seed-clusters.sql`; the wait-and-apply wrapper lives at `seed/entrypoint.sh`. To re-seed manually:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml run --rm cluster-seed
|
||||
```
|
||||
|
||||
## Bring up
|
||||
|
||||
```bash
|
||||
# from the repo root
|
||||
docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
|
||||
# wait ~20 seconds for SQL to come up + all three clusters to form
|
||||
|
||||
open http://localhost # main cluster admin UI
|
||||
open http://site-a.localhost # site A admin UI
|
||||
open http://site-b.localhost # site B admin UI
|
||||
open http://localhost:8080 # Traefik dashboard
|
||||
```
|
||||
|
||||
On macOS, `*.localhost` resolves to `127.0.0.1` automatically. On Linux add `127.0.0.1 site-a.localhost site-b.localhost` to `/etc/hosts` if your resolver doesn't.
|
||||
|
||||
The first build takes a few minutes (.NET SDK image + restore + publish). Subsequent rebuilds are faster with Docker's layer cache.
|
||||
|
||||
## Auth (dev only)
|
||||
|
||||
Use one of the LDAP dev users from `LDAP_USERS` in `docker-compose.yml`:
|
||||
|
||||
| Username | Password |
|
||||
|---|---|
|
||||
| `alice` | `alice123` |
|
||||
| `bob` | `bob123` |
|
||||
|
||||
The compose mounts everyone into `ou=FleetAdmin` so the dev role mapping resolves to `FleetAdmin`.
|
||||
|
||||
## Tear down
|
||||
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml down -v
|
||||
```
|
||||
|
||||
The `-v` drops the SQL + LDAP volumes; remove it to keep ConfigDb state across restarts.
|
||||
|
||||
## Failover smoke
|
||||
|
||||
1. Watch the Traefik dashboard at `http://localhost:8080`. Both `admin-a` and `admin-b` should be listed as healthy in the `otopcua-admin` service.
|
||||
2. `docker compose -f docker-dev/docker-compose.yml stop admin-a` — `admin-b` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `admin-b` once its `/health/active` returns 200.
|
||||
3. `docker compose -f docker-dev/docker-compose.yml start admin-a` — `admin-a` rejoins as a follower; `admin-b` keeps the leader role until something disturbs it.
|
||||
|
||||
## Notes
|
||||
|
||||
- This compose is for the **local Mac/Linux developer rig**. The team's CI + soak runs go to the remote docker host at `10.100.0.35` (see `docs/v2/dev-environment.md`); the file there mirrors this one with adjusted port bindings.
|
||||
- The OPC UA driver endpoints are reachable directly from the host (Traefik is only in front of the admin HTTP surface):
|
||||
- Main: `opc.tcp://localhost:4840` (driver-a), `opc.tcp://localhost:4841` (driver-b)
|
||||
- Site A: `opc.tcp://localhost:4842` (site-a-1), `opc.tcp://localhost:4843` (site-a-2)
|
||||
- Site B: `opc.tcp://localhost:4844` (site-b-1), `opc.tcp://localhost:4845` (site-b-2)
|
||||
- Galaxy + Wonderware drivers can't run in Linux containers (they need the Windows-only mxaccessgw + Historian SDK). On non-Windows, `DriverInstanceActor.ShouldStub(driverType, roles)` returns `true` for those types and the actor goes straight to a `Stubbed` state that returns deterministic success.
|
||||
@@ -0,0 +1,268 @@
|
||||
# docker-dev/ — Mac-friendly multi-cluster fleet for v2 development + manual UI exercise.
|
||||
#
|
||||
# Stack (3 separate Akka clusters — all share the single `OtOpcUa` ConfigDb):
|
||||
# sql SQL Server 2022 — hosts the one ConfigDb that all three clusters use
|
||||
# ldap OpenLDAP with the dev users from C:\publish\glauth\auth.md mirrored in
|
||||
#
|
||||
# Main cluster (existing — split-role admin / driver pair on a single Akka mesh):
|
||||
# admin-a OtOpcUa.Host with OTOPCUA_ROLES=admin (seed)
|
||||
# admin-b OtOpcUa.Host with OTOPCUA_ROLES=admin (joins admin-a)
|
||||
# driver-a OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
# driver-b OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
#
|
||||
# Site A cluster (2-node fused admin+driver):
|
||||
# site-a-1, site-a-2 OTOPCUA_ROLES=admin,driver, seed = site-a-1
|
||||
#
|
||||
# Site B cluster (2-node fused admin+driver):
|
||||
# site-b-1, site-b-2 OTOPCUA_ROLES=admin,driver, seed = site-b-1
|
||||
#
|
||||
# traefik PathPrefix → main cluster admin-a/admin-b; Host(`site-a.localhost`) →
|
||||
# site-a-*; Host(`site-b.localhost`) → site-b-*. Add the two site hosts to
|
||||
# your /etc/hosts (or rely on macOS `.localhost` auto-resolution).
|
||||
#
|
||||
# Multi-tenancy: ConfigDb is one schema with a `ServerCluster` table; each Akka cluster
|
||||
# corresponds to a row in it (ClusterId = "MAIN" / "SITE-A" / "SITE-B"), and each node's
|
||||
# `ClusterNode.NodeId` points back at the row that owns it. After first boot, sign in to
|
||||
# any cluster's Admin UI and create the matching ServerCluster + ClusterNode rows via
|
||||
# /clusters and /hosts so the runtime knows what configuration scope applies.
|
||||
#
|
||||
# Akka mesh isolation: same system name "otopcua" + same remoting port 4053 inside each
|
||||
# container's own network namespace, but with disjoint seed-node lists — gossip never
|
||||
# crosses between the three meshes.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
# open http://localhost # main cluster Blazor admin UI
|
||||
# open http://site-a.localhost # site A admin UI
|
||||
# open http://site-b.localhost # site B admin UI
|
||||
# open http://localhost:8080 # Traefik dashboard
|
||||
#
|
||||
# Tear-down: docker compose -f docker-dev/docker-compose.yml down -v
|
||||
|
||||
name: otopcua-dev
|
||||
|
||||
services:
|
||||
|
||||
sql:
|
||||
image: mcr.microsoft.com/mssql/server:2022-latest
|
||||
environment:
|
||||
ACCEPT_EULA: "Y"
|
||||
SA_PASSWORD: "OtOpcUa!Dev123"
|
||||
MSSQL_PID: Developer
|
||||
ports:
|
||||
- "14330:1433"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P 'OtOpcUa!Dev123' -No -Q 'SELECT 1' || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
|
||||
# ── Cluster seed (one-shot) ────────────────────────────────────────────────
|
||||
# Waits for SQL + the host containers' EF auto-migration, then INSERTs the
|
||||
# three ServerCluster rows and the six ClusterNode rows that scope each Akka
|
||||
# mesh inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops.
|
||||
cluster-seed:
|
||||
image: mcr.microsoft.com/mssql-tools:latest
|
||||
depends_on:
|
||||
sql:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./seed:/seed:ro
|
||||
entrypoint: ["/bin/bash", "/seed/entrypoint.sh"]
|
||||
restart: "no"
|
||||
|
||||
ldap:
|
||||
image: bitnami/openldap:2.6
|
||||
environment:
|
||||
LDAP_ROOT: "dc=lmxopcua,dc=local"
|
||||
LDAP_ADMIN_USERNAME: "admin"
|
||||
LDAP_ADMIN_PASSWORD: "ldapadmin"
|
||||
LDAP_USERS: "alice,bob"
|
||||
LDAP_PASSWORDS: "alice123,bob123"
|
||||
LDAP_USER_DC: "ou=FleetAdmin"
|
||||
ports:
|
||||
- "3893:1389"
|
||||
|
||||
admin-a: &otopcua-host
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker-dev/Dockerfile
|
||||
image: otopcua-host:dev
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__Server: "ldap"
|
||||
Authentication__Ldap__Port: "1389"
|
||||
Authentication__Ldap__AllowInsecureLdap: "true"
|
||||
|
||||
admin-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__Server: "ldap"
|
||||
Authentication__Ldap__Port: "1389"
|
||||
Authentication__Ldap__AllowInsecureLdap: "true"
|
||||
|
||||
driver-a:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
ports:
|
||||
- "4840:4840"
|
||||
|
||||
driver-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
ports:
|
||||
- "4841:4840"
|
||||
|
||||
# ── Site A cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
# Shares the OtOpcUa ConfigDb with the main + site-b clusters; multi-tenancy is
|
||||
# enforced by ServerCluster.ClusterId rows (configure via /clusters after boot).
|
||||
# Akka isolation comes from the disjoint seed list (seed = site-a-1).
|
||||
|
||||
site-a-1:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__Server: "ldap"
|
||||
Authentication__Ldap__Port: "1389"
|
||||
Authentication__Ldap__AllowInsecureLdap: "true"
|
||||
ports:
|
||||
- "4842:4840"
|
||||
|
||||
site-a-2:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-a-1: { condition: service_started }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__Server: "ldap"
|
||||
Authentication__Ldap__Port: "1389"
|
||||
Authentication__Ldap__AllowInsecureLdap: "true"
|
||||
ports:
|
||||
- "4843:4840"
|
||||
|
||||
# ── Site B cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
|
||||
site-b-1:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__Server: "ldap"
|
||||
Authentication__Ldap__Port: "1389"
|
||||
Authentication__Ldap__AllowInsecureLdap: "true"
|
||||
ports:
|
||||
- "4844:4840"
|
||||
|
||||
site-b-2:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-b-1: { condition: service_started }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Authentication__Ldap__Server: "ldap"
|
||||
Authentication__Ldap__Port: "1389"
|
||||
Authentication__Ldap__AllowInsecureLdap: "true"
|
||||
ports:
|
||||
- "4845:4840"
|
||||
|
||||
traefik:
|
||||
image: traefik:v3.1
|
||||
command:
|
||||
- --entrypoints.web.address=:80
|
||||
- --providers.file.filename=/etc/traefik/dynamic.yml
|
||||
- --providers.file.watch=true
|
||||
- --api.insecure=true
|
||||
ports:
|
||||
- "80:80"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./traefik-dynamic.yml:/etc/traefik/dynamic.yml:ro
|
||||
depends_on:
|
||||
- admin-a
|
||||
- admin-b
|
||||
- site-a-1
|
||||
- site-a-2
|
||||
- site-b-1
|
||||
- site-b-2
|
||||
Executable
+35
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
# docker-dev cluster-seed entrypoint. Waits for the host containers to finish
|
||||
# their EF Core auto-migration (which creates the ServerCluster table), then
|
||||
# applies the idempotent seed script.
|
||||
#
|
||||
# Image: mcr.microsoft.com/mssql-tools (Debian + sqlcmd at /opt/mssql-tools18/bin).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SQLCMD="/opt/mssql-tools18/bin/sqlcmd"
|
||||
SERVER="${SQL_HOST:-sql},1433"
|
||||
USER="${SQL_USER:-sa}"
|
||||
PASS="${SQL_PASSWORD:-OtOpcUa!Dev123}"
|
||||
DB="${SQL_DATABASE:-OtOpcUa}"
|
||||
|
||||
run_sql() {
|
||||
"$SQLCMD" -S "$SERVER" -U "$USER" -P "$PASS" -d "$DB" -No -b -h -1 "$@"
|
||||
}
|
||||
|
||||
echo "[cluster-seed] waiting for SQL Server to accept connections..."
|
||||
until run_sql -Q "SELECT 1" >/dev/null 2>&1; do
|
||||
sleep 2
|
||||
done
|
||||
echo "[cluster-seed] SQL Server up."
|
||||
|
||||
echo "[cluster-seed] waiting for $DB.ServerCluster (host containers must finish EF migration)..."
|
||||
until run_sql -Q "IF OBJECT_ID('dbo.ServerCluster') IS NULL THROW 50001, 'missing', 1; SELECT 1" >/dev/null 2>&1; do
|
||||
sleep 3
|
||||
done
|
||||
echo "[cluster-seed] schema ready."
|
||||
|
||||
echo "[cluster-seed] applying seed-clusters.sql..."
|
||||
run_sql -i /seed/seed-clusters.sql
|
||||
|
||||
echo "[cluster-seed] done."
|
||||
@@ -0,0 +1,106 @@
|
||||
-- docker-dev cluster seed. Idempotent — safe to re-run on every `docker compose up`.
|
||||
--
|
||||
-- Populates:
|
||||
-- ServerCluster MAIN, SITE-A, SITE-B
|
||||
-- ClusterNode driver-a, driver-b → MAIN
|
||||
-- site-a-1, site-a-2 → SITE-A
|
||||
-- site-b-1, site-b-2 → SITE-B
|
||||
--
|
||||
-- ServerCluster.NodeCount + RedundancyMode are coupled by CHECK constraint:
|
||||
-- NodeCount=1 ⇒ RedundancyMode='None'
|
||||
-- NodeCount=2 ⇒ RedundancyMode∈('Warm','Hot')
|
||||
--
|
||||
-- Each ClusterNode.ApplicationUri MUST be globally unique (UX_ClusterNode_ApplicationUri).
|
||||
-- Convention: urn:OtOpcUa:<NodeId>.
|
||||
--
|
||||
-- Host = Compose service name (resolves inside the otopcua-dev network).
|
||||
-- OpcUaPort stays at the container-internal 4840; the host-side port mapping is in
|
||||
-- docker-compose.yml ports: blocks and is irrelevant to ClusterNode rows.
|
||||
|
||||
SET NOCOUNT ON;
|
||||
SET XACT_ABORT ON;
|
||||
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ServerCluster
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'MAIN')
|
||||
INSERT INTO dbo.ServerCluster
|
||||
(ClusterId, Name, Enterprise, Site, NodeCount, RedundancyMode, Enabled, Notes, CreatedBy)
|
||||
VALUES
|
||||
('MAIN', 'Main cluster', 'zb', 'docker-dev',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — admin-a/admin-b control-plane, driver-a/driver-b OPC UA publishers.',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-A')
|
||||
INSERT INTO dbo.ServerCluster
|
||||
(ClusterId, Name, Enterprise, Site, NodeCount, RedundancyMode, Enabled, Notes, CreatedBy)
|
||||
VALUES
|
||||
('SITE-A', 'Site A', 'zb', 'site-a',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
INSERT INTO dbo.ServerCluster
|
||||
(ClusterId, Name, Enterprise, Site, NodeCount, RedundancyMode, Enabled, Notes, CreatedBy)
|
||||
VALUES
|
||||
('SITE-B', 'Site B', 'zb', 'site-b',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — main cluster OPC UA publishers
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-a')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-a', 'MAIN', 'driver-a', 4840, 8081, 'urn:OtOpcUa:driver-a', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-b')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-b', 'MAIN', 'driver-b', 4840, 8081, 'urn:OtOpcUa:driver-b', 150, 1, 'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — site A
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-1')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-a-1', 'SITE-A', 'site-a-1', 4840, 8081, 'urn:OtOpcUa:site-a-1', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-2')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-a-2', 'SITE-A', 'site-a-2', 4840, 8081, 'urn:OtOpcUa:site-a-2', 150, 1, 'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — site B
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-1')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-b-1', 'SITE-B', 'site-b-1', 4840, 8081, 'urn:OtOpcUa:site-b-1', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-2')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('site-b-2', 'SITE-B', 'site-b-2', 4840, 8081, 'urn:OtOpcUa:site-b-2', 150, 1, 'docker-dev-seed');
|
||||
|
||||
COMMIT TRANSACTION;
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Summary (logged by sqlcmd output)
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
SELECT ClusterId, Name, NodeCount, RedundancyMode FROM dbo.ServerCluster ORDER BY ClusterId;
|
||||
SELECT NodeId, ClusterId, Host, OpcUaPort, ApplicationUri, ServiceLevelBase
|
||||
FROM dbo.ClusterNode ORDER BY ClusterId, NodeId;
|
||||
@@ -0,0 +1,57 @@
|
||||
# docker-dev companion to scripts/install/traefik-dynamic.yml. Routes three
|
||||
# Akka clusters that share the Compose network:
|
||||
#
|
||||
# - Main cluster (default): PathPrefix(`/`) → admin-a / admin-b.
|
||||
# - Site A cluster: Host(`site-a.localhost`) → site-a-1 / site-a-2.
|
||||
# - Site B cluster: Host(`site-b.localhost`) → site-b-1 / site-b-2.
|
||||
#
|
||||
# Host-header rules are more specific than PathPrefix, so they win over the
|
||||
# default router for the site hostnames automatically — no priority field needed.
|
||||
|
||||
http:
|
||||
routers:
|
||||
otopcua-admin:
|
||||
entryPoints: ["web"]
|
||||
rule: "PathPrefix(`/`)"
|
||||
service: otopcua-admin
|
||||
|
||||
otopcua-site-a:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-a.localhost`)"
|
||||
service: otopcua-site-a
|
||||
|
||||
otopcua-site-b:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-b.localhost`)"
|
||||
service: otopcua-site-b
|
||||
|
||||
services:
|
||||
otopcua-admin:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://admin-a:9000"
|
||||
- url: "http://admin-b:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-a:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://site-a-1:9000"
|
||||
- url: "http://site-a-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-b:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://site-b-1:9000"
|
||||
- url: "http://site-b-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
@@ -1,6 +1,6 @@
|
||||
# Address Space
|
||||
|
||||
Each driver's browsable subtree is built by streaming nodes from the driver's `ITagDiscovery.DiscoverAsync` implementation into an `IAddressSpaceBuilder`. `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) owns the shared orchestration; `DriverNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs`) implements `IAddressSpaceBuilder` against the OPC Foundation stack's `CustomNodeManager2`. The same code path serves Galaxy object hierarchies, Modbus PLC registers, AB CIP tags, TwinCAT symbols, FOCAS CNC parameters, and OPC UA Client aggregations — Galaxy is one driver of seven, not the driver.
|
||||
Each driver's browsable subtree is built by streaming nodes from the driver's `ITagDiscovery.DiscoverAsync` implementation into an `IAddressSpaceBuilder`. `GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) owns the shared orchestration; in v2 the SDK-driven materialization is handled by `OtOpcUaNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`) fed via `SdkAddressSpaceSink` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs`). The same code path serves Galaxy object hierarchies, Modbus PLC registers, AB CIP tags, TwinCAT symbols, FOCAS CNC parameters, and OPC UA Client aggregations — Galaxy is one driver of seven, not the driver.
|
||||
|
||||
## Driver root folder
|
||||
|
||||
@@ -66,7 +66,7 @@ Drivers that implement `IRediscoverable` fire `OnRediscoveryNeeded` when their b
|
||||
## Key source files
|
||||
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — orchestration + `CapturingBuilder`
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — OPC UA materialization (`IAddressSpaceBuilder` impl + `NestedBuilder`)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs`, `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs` — OPC UA materialization (write-only sink fed by the actor system)
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IAddressSpaceBuilder.cs` — builder contract
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs` — driver discovery capability
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverAttributeInfo.cs` — per-attribute descriptor
|
||||
|
||||
@@ -15,9 +15,10 @@ historical reference.
|
||||
| **Galaxy sub-attribute fallback** | `IWritable` writes to `$Alarm*` sub-attributes | gateway data subscription → driver `OnDataChange` → `DriverNodeManager` ConditionSink → `AlarmConditionService` |
|
||||
| **Scripted alarms** | `Phase7EngineComposer` | server-side script evaluator → `Phase7EngineComposer.RouteToHistorianAsync` + `AlarmConditionService` |
|
||||
|
||||
All three converge on `AlarmConditionService` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Alarms/AlarmConditionService.cs`),
|
||||
which owns the OPC UA Part 9 state machine and dispatches transitions
|
||||
to the OPC UA condition node managers. Driver-native transitions take
|
||||
All three converge on the alarm-state actor — in v2 the OPC UA Part 9 state
|
||||
machine lives inside `ScriptedAlarmActor`
|
||||
(`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`),
|
||||
which dispatches transitions to the OPC UA condition node managers. Driver-native transitions take
|
||||
precedence over sub-attribute synthesis when both arrive for the same
|
||||
condition — the dedup logic prefers the richer driver-native record
|
||||
because it carries the full operator + raise-time + category metadata
|
||||
|
||||
@@ -28,7 +28,7 @@ Static drivers (Modbus, S7, AB CIP, AB Legacy, FOCAS) do not implement `IRedisco
|
||||
|
||||
Tag-set changes authored in the Admin UI (UNS edits, CSV imports, driver-config edits) accumulate in a draft generation and commit via `sp_PublishGeneration`. The delta between the currently-published generation and the proposed next one is computed by `sp_ComputeGenerationDiff`, which drives:
|
||||
|
||||
- The **DiffViewer** in Admin (`src/Server/ZB.MOM.WW.OtOpcUa.Admin/Components/Pages/Clusters/DiffViewer.razor`) so operators can preview what will change before clicking Publish.
|
||||
- The publish-preview surface in the Admin UI (`src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Deployments.razor`, backed by `AdminOperationsClient`) so operators can preview what will change before clicking Publish.
|
||||
- The 409-on-stale-draft flow (decision #161) — a UNS drag-reorder preview carries a `DraftRevisionToken` so Confirm returns `409 Conflict / refresh-required` if the draft advanced between preview and commit.
|
||||
|
||||
After publish, the server's generation applier invokes `IDriver.ReinitializeAsync(driverConfigJson, ct)` on every driver whose `DriverInstance.DriverConfig` row changed in the new generation. Reinitialize is the in-process recovery path for Tier A/B drivers; if it fails the driver is marked `DriverState.Faulted` and its nodes go Bad quality — but the server process stays running. See `docs/v2/driver-stability.md`.
|
||||
@@ -64,6 +64,7 @@ Subscriptions for unchanged references stay live across rebuilds — their ref-c
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IRediscoverable.cs` — backend-change capability
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — discovery orchestration
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriver.cs` — `ReinitializeAsync` contract
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/GenerationService.cs` — publish-flow driver
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Coordinators/ConfigPublishCoordinator.cs` — publish-flow driver
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs` — cluster singleton invoked by the Admin UI's `AdminOperationsClient`
|
||||
- `docs/v2/config-db-schema.md` — `sp_PublishGeneration` + `sp_ComputeGenerationDiff`
|
||||
- `docs/v2/admin-ui.md` — DiffViewer + draft-revision-token flow
|
||||
|
||||
+9
-8
@@ -1,13 +1,13 @@
|
||||
# OPC UA Server
|
||||
|
||||
The OPC UA server component (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs`) hosts the OPC UA stack and exposes one browsable subtree per registered driver. The server itself is driver-agnostic — Galaxy/MXAccess, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client are all plugged in as `IDriver` implementations via the capability interfaces in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
|
||||
The OPC UA server component (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs`) hosts the OPC UA stack and exposes one browsable subtree per registered driver. The server itself is driver-agnostic — Galaxy/MXAccess, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client are all plugged in as `IDriver` implementations via the capability interfaces in `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
|
||||
|
||||
## Composition
|
||||
|
||||
`OtOpcUaServer` subclasses the OPC Foundation `StandardServer` and wires:
|
||||
|
||||
- A `DriverHost` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs`) which registers drivers and holds the per-instance `IDriver` references.
|
||||
- One `DriverNodeManager` per registered driver (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs`), constructed in `CreateMasterNodeManager`. Each manager owns its own namespace URI (`urn:OtOpcUa:{DriverInstanceId}`) and exposes the driver as a subtree under the standard `Objects` folder.
|
||||
- One `DriverNodeManager` per registered driver (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`), constructed in `CreateMasterNodeManager`. Each manager owns its own namespace URI (`urn:OtOpcUa:{DriverInstanceId}`) and exposes the driver as a subtree under the standard `Objects` folder.
|
||||
- A `CapabilityInvoker` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs`) per driver instance, keyed on `(DriverInstanceId, HostName, DriverCapability)` against the shared `DriverResiliencePipelineBuilder`. Every Read/Write/Discovery/Subscribe/HistoryRead/AlarmSubscribe call on the driver flows through this invoker so the Polly pipeline (retry / timeout / breaker / bulkhead) applies. The OTOPCUA0001 Roslyn analyzer enforces the wrapping at compile time.
|
||||
- An `IUserAuthenticator` (LDAP in production, injected stub in tests) for `UserName` token validation in the `ImpersonateUser` hook.
|
||||
- Optional `AuthorizationGate` + `NodeScopeResolver` (Phase 6.2) that sit in front of every dispatch call. In lax mode the gate passes through when the identity lacks LDAP groups so existing integration tests keep working; strict mode (`Authorization:StrictMode = true`) denies those cases.
|
||||
@@ -50,7 +50,7 @@ The host name fed to the invoker comes from `IPerCallHostResolver.ResolveHost(fu
|
||||
|
||||
## Redundancy
|
||||
|
||||
`Redundancy.Enabled = true` on the `ServerInstance` activates the `RedundancyCoordinator` + `ServiceLevelCalculator` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Redundancy/`). Standard OPC UA redundancy nodes (`Server/ServerRedundancy/RedundancySupport`, `ServerUriArray`, `Server/ServiceLevel`) are populated on startup; `ServiceLevel` recomputes whenever any driver's `DriverHealth` changes. The apply-lease mechanism prevents two instances from concurrently applying a generation. See `docs/Redundancy.md`.
|
||||
`Redundancy.Enabled = true` on the `ServerInstance` activates the `RedundancyStateActor` + `ServiceLevelCalculator` (`src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/Redundancy/`). Standard OPC UA redundancy nodes (`Server/ServerRedundancy/RedundancySupport`, `ServerUriArray`, `Server/ServiceLevel`) are populated on startup; `ServiceLevel` recomputes whenever any driver's `DriverHealth` changes. The apply-lease mechanism prevents two instances from concurrently applying a generation. See `docs/Redundancy.md`.
|
||||
|
||||
## Server class hierarchy
|
||||
|
||||
@@ -79,10 +79,11 @@ Certificate stores default to `%LOCALAPPDATA%\OPC Foundation\pki\` (directory-ba
|
||||
|
||||
## Key source files
|
||||
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs` — `StandardServer` subclass + `ImpersonateUser` hook
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — per-driver `CustomNodeManager2` + dispatch surface
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OpcUaApplicationHost.cs` — programmatic `ApplicationConfiguration` + lifecycle
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaSdkServer.cs` — `StandardServer` subclass
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs` — programmatic `ApplicationConfiguration` + lifecycle + `ImpersonateUser` hook
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` — SDK node manager + write-only address-space sink
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/SdkAddressSpaceSink.cs` — `IOpcUaAddressSpaceSink` adapter the actor system pushes into
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — per-driver discovery + dispatch surface
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs` — driver registration
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs` — Polly pipeline entry point
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — Phase 6.2 permission trie + evaluator
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/AuthorizationGate.cs` — stack-to-evaluator bridge
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — permission trie + evaluator (`PermissionTrie`, `PermissionTrieCache`, `TriePermissionEvaluator`)
|
||||
|
||||
+7
-4
@@ -9,10 +9,13 @@ The project was originally called **LmxOpcUa** (a single-driver Galaxy/MXAccess
|
||||
|
||||
## Platform overview
|
||||
|
||||
- **Core** owns the OPC UA stack, address space, session/security/subscription machinery.
|
||||
> **v2 (2026-05-26):** the separate `OtOpcUa.Server` + `OtOpcUa.Admin` services fused into a single role-gated `OtOpcUa.Host` binary, joined by an Akka.NET cluster. See [v2 design](plans/2026-05-26-akka-hosting-alignment-design.md) for the architectural decision.
|
||||
|
||||
- **Core** owns shared abstractions (driver capability contracts, scripting, virtual tags, alarm historian).
|
||||
- **Drivers** plug in via capability interfaces in `ZB.MOM.WW.OtOpcUa.Core.Abstractions`: `IDriver`, `IReadable`, `IWritable`, `ITagDiscovery`, `ISubscribable`, `IHostConnectivityProbe`, `IAlarmSource`, `IHistoryProvider`, `IPerCallHostResolver`. Each driver opts into whichever it supports.
|
||||
- **Server** is the OPC UA endpoint process (net10, AnyCPU). Hosts every driver in-process. The Galaxy driver reaches MXAccess via gRPC to a separately-installed **mxaccessgw** sidecar (sibling repo); it is no longer hosted from this repo.
|
||||
- **Admin** is the Blazor Server operator UI (net10, x64). Owns the Config DB draft/publish flow, ACL + role-grant authoring, fleet status + `/metrics` scrape endpoint.
|
||||
- **Host** (`src/Server/ZB.MOM.WW.OtOpcUa.Host`) is the single fused binary (.NET 10, AnyCPU). `OTOPCUA_ROLES` env decides what to mount: `admin` (Blazor + control-plane singletons), `driver` (OPC UA endpoint + per-node actors), or both. See [ServiceHosting.md](ServiceHosting.md).
|
||||
- **Cluster + ControlPlane + Runtime + AdminUI + Security** sit between Core and Host. The cluster glues per-node actors into one logical fleet; the control-plane singletons (deploy coordinator, audit writer, redundancy state) live on the admin role-leader. See [Redundancy.md](Redundancy.md).
|
||||
- The Galaxy driver still reaches MXAccess via gRPC to a separately-installed **mxaccessgw** sidecar (sibling repo).
|
||||
|
||||
## Where to find what
|
||||
|
||||
@@ -56,7 +59,7 @@ For Modbus / S7 / AB CIP / AB Legacy / TwinCAT / FOCAS / OPC UA Client specifics
|
||||
| [security.md](security.md) | Transport security profiles, LDAP auth, ACL trie, role grants, OTOPCUA0001 analyzer |
|
||||
| [Redundancy.md](Redundancy.md) | `RedundancyCoordinator`, `ServiceLevelCalculator`, apply-lease, Prometheus metrics |
|
||||
| [Reservations.md](Reservations.md) | Fleet-wide ZTag / SAPID external-ID reservations — publish-time claim, release flow |
|
||||
| [ServiceHosting.md](ServiceHosting.md) | Two-process deploy (Server + Admin) install/uninstall, plus the optional `OtOpcUaWonderwareHistorian` sidecar |
|
||||
| [ServiceHosting.md](ServiceHosting.md) | Single fused `OtOpcUa.Host` binary install/uninstall with `OTOPCUA_ROLES` gating, plus the optional `OtOpcUaWonderwareHistorian` sidecar |
|
||||
| [StatusDashboard.md](StatusDashboard.md) | Pointer — superseded by [v2/admin-ui.md](v2/admin-ui.md) |
|
||||
|
||||
### Client tooling
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Read/Write Operations
|
||||
|
||||
`DriverNodeManager` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs`) wires the OPC UA stack's per-variable `OnReadValue` and `OnWriteValue` hooks to each driver's `IReadable` and `IWritable` capabilities. Every dispatch flows through `CapabilityInvoker` so the Polly pipeline (retry / timeout / breaker / bulkhead) applies uniformly across Galaxy, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client drivers.
|
||||
`GenericDriverNodeManager` (`src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs`) wires the OPC UA stack's per-variable `OnReadValue` and `OnWriteValue` hooks to each driver's `IReadable` and `IWritable` capabilities. Every dispatch flows through `CapabilityInvoker` so the Polly pipeline (retry / timeout / breaker / bulkhead) applies uniformly across Galaxy, Modbus, S7, AB CIP, AB Legacy, TwinCAT, FOCAS, and OPC UA Client drivers.
|
||||
|
||||
## Driver vs virtual dispatch
|
||||
|
||||
@@ -60,8 +60,7 @@ Per decision #12, exceptions in the driver's capability call are logged and conv
|
||||
|
||||
## Key source files
|
||||
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — `OnReadValue` / `OnWriteValue` hooks
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/WriteAuthzPolicy.cs` — classification-to-role policy
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/AuthorizationGate.cs` — Phase 6.2 trie gate
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — `OnReadValue` / `OnWriteValue` hooks
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/` — permission trie + evaluator (`PermissionTrie`, `PermissionTrieCache`, `TriePermissionEvaluator`) that gates Read/Write/Subscribe per the session's resolved LDAP groups
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs` — `ExecuteAsync` / `ExecuteWriteAsync`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IReadable.cs`, `IWritable.cs`, `WriteIdempotentAttribute.cs`
|
||||
|
||||
+78
-72
@@ -1,103 +1,109 @@
|
||||
# Redundancy
|
||||
# Redundancy (v2)
|
||||
|
||||
## Overview
|
||||
|
||||
OtOpcUa supports OPC UA **non-transparent** warm/hot redundancy. Two (or more) OtOpcUa Server processes run side-by-side, share the same Config DB, the same driver backends (Galaxy ZB, MXAccess runtime, remote PLCs), and advertise the same OPC UA node tree. Each process owns a distinct `ApplicationUri`; OPC UA clients see both endpoints via the standard `ServerUriArray` and pick one based on the `ServiceLevel` that each server publishes.
|
||||
OtOpcUa supports OPC UA **non-transparent** warm/hot redundancy. Two or more `OtOpcUa.Host` processes run side-by-side, share the same Config DB, and join the same Akka.NET cluster. Each process owns a distinct `ApplicationUri`; OPC UA clients discover both endpoints by reading `Server.ServerArray` (NodeId `i=2254`) on either node and pick one based on the `ServiceLevel` byte that each server publishes.
|
||||
|
||||
The redundancy surface lives in `src/Server/ZB.MOM.WW.OtOpcUa.Server/Redundancy/`:
|
||||
> **Discovery surface.** The `ServerArray` path on the `Server` object is what each node populates with self + peer `ApplicationUri`s — see `OpcUaApplicationHost.PopulateServerArray` and the per-node `PeerApplicationUris` option below. The redundancy-object-type `ServerUriArray` proper (a child of `Server.ServerRedundancy`) remains deferred pending an SDK object-type upgrade; clients should read `Server.ServerArray` for peer discovery today.
|
||||
|
||||
| Class | Role |
|
||||
|---|---|
|
||||
| `RedundancyCoordinator` | Process-singleton; owns the current `RedundancyTopology` loaded from the `ClusterNode` table. `RefreshAsync` re-reads after `sp_PublishGeneration` so operator role swaps take effect without a process restart. CAS-style swap (`Interlocked.Exchange`) means readers always see a coherent snapshot. |
|
||||
| `RedundancyTopology` | Immutable `(ClusterId, Self, Peers, ServerUriArray, ValidityFlags)` snapshot. |
|
||||
| `ApplyLeaseRegistry` | Tracks in-progress `sp_PublishGeneration` apply leases keyed on `(ConfigGenerationId, PublishRequestId)`. `await using` the disposable scope guarantees every exit path (success / exception / cancellation) decrements the lease; a stale-lease watchdog force-closes any lease older than `ApplyMaxDuration` (default 10 minutes) so a crashed publisher can't pin the node at `PrimaryMidApply`. |
|
||||
| `PeerReachabilityTracker` | Maintains last-known reachability for each peer node over two independent probes — OPC UA ping and HTTP `/healthz`. Both must succeed for `peerReachable = true`. |
|
||||
| `RecoveryStateManager` | Gates transitions out of the `Recovering*` bands; requires dwell + publish-witness satisfaction before allowing a return to nominal. |
|
||||
| `ServiceLevelCalculator` | Pure function `(role, selfHealthy, peerUa, peerHttp, applyInProgress, recoveryDwellMet, topologyValid, operatorMaintenance) → byte`. |
|
||||
| `RedundancyStatePublisher` | Orchestrates inputs into the calculator, pushes the resulting byte to the OPC UA `ServiceLevel` variable via an edge-triggered `OnStateChanged` event, and fires `OnServerUriArrayChanged` when the topology's `ServerUriArray` shifts. |
|
||||
> **v2 change.** v1's operator-managed `ClusterNode.RedundancyRole` column + `RedundancyCoordinator` / `ApplyLeaseRegistry` / `PeerHttpProbeLoop` are gone. Primary/secondary is now derived from **Akka cluster role-leader** for the `driver` role. The operator no longer writes a role into the DB; cluster topology + health drive ServiceLevel automatically.
|
||||
|
||||
## Data model
|
||||
The runtime pieces live in:
|
||||
|
||||
Per-node redundancy state lives in the Config DB `ClusterNode` table (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Entities/ClusterNode.cs`):
|
||||
|
||||
| Column | Role |
|
||||
|---|---|
|
||||
| `NodeId` | Unique node identity; matches `Node:NodeId` in the server's bootstrap `appsettings.json`. |
|
||||
| `ClusterId` | Foreign key into `ServerCluster`. |
|
||||
| `RedundancyRole` | `Primary`, `Secondary`, or `Standalone` (`RedundancyRole` enum in `Configuration/Enums`). |
|
||||
| `ServiceLevelBase` | Per-node base value used to bias nominal ServiceLevel output. |
|
||||
| `ApplicationUri` | Unique-per-node OPC UA ApplicationUri advertised in endpoint descriptions. |
|
||||
|
||||
`ServerUriArray` is derived from the set of peer `ApplicationUri` values at topology-load time and republished when the topology changes.
|
||||
|
||||
## ServiceLevel matrix
|
||||
|
||||
`ServiceLevelCalculator` produces one of the following bands (see `ServiceLevelBand` enum in the same file):
|
||||
|
||||
| Band | Byte | Meaning |
|
||||
| Component | Project | Role |
|
||||
|---|---|---|
|
||||
| `Maintenance` | 0 | Operator-declared maintenance. |
|
||||
| `NoData` | 1 | Self-reported unhealthy (`/healthz` fails). |
|
||||
| `InvalidTopology` | 2 | More than one Primary detected; both nodes self-demote. |
|
||||
| `RecoveringBackup` | 30 | Backup post-fault, dwell not met. |
|
||||
| `BackupMidApply` | 50 | Backup inside a publish-apply window. |
|
||||
| `IsolatedBackup` | 80 | Primary unreachable; Backup says "take over if asked" — does **not** auto-promote (non-transparent model). |
|
||||
| `AuthoritativeBackup` | 100 | Backup nominal. |
|
||||
| `RecoveringPrimary` | 180 | Primary post-fault, dwell not met. |
|
||||
| `PrimaryMidApply` | 200 | Primary inside a publish-apply window. |
|
||||
| `IsolatedPrimary` | 230 | Primary with unreachable peer, retains authority. |
|
||||
| `AuthoritativePrimary` | 255 | Primary nominal. |
|
||||
| `ServiceLevelCalculator` | `OtOpcUa.ControlPlane.Redundancy` | Pure function `(NodeHealthInputs) → byte`. No side effects. |
|
||||
| `RedundancyStateActor` | `OtOpcUa.ControlPlane.Redundancy` | Admin-role cluster singleton; subscribes to cluster topology events, debounces 250ms, broadcasts `RedundancyStateChanged` on the `redundancy-state` DPS topic. |
|
||||
| `DbHealthProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; runs `SELECT 1` against ConfigDb every 5s. Read by health endpoint + redundancy calc. |
|
||||
| `PeerOpcUaProbeActor` | `OtOpcUa.Runtime.Health` | Per-node; pings peer `opc.tcp://peer:4840` (real probe call is staged for follow-up F12). |
|
||||
| `ClusterRoleInfo` | `OtOpcUa.Cluster` | Live view of cluster membership + role-leader; exposes `IClusterRoleInfo` to the rest of the host. |
|
||||
|
||||
The reserved bands (0 Maintenance, 1 NoData, 2 InvalidTopology) take precedence over operational states per OPC UA Part 5 §6.3.34. Operational values occupy 2..255 so spec-compliant clients that treat "<3 = unhealthy" keep working.
|
||||
## ServiceLevel tiers (Part 5 §6.5)
|
||||
|
||||
Standalone nodes (single-instance deployments) report `AuthoritativePrimary` when healthy and `PrimaryMidApply` during publish.
|
||||
`ServiceLevelCalculator.Compute(NodeHealthInputs)` returns a byte in 0..255 by tier:
|
||||
|
||||
## Publish fencing and split-brain prevention
|
||||
| Tier | Byte | Condition |
|
||||
|---|---|---|
|
||||
| Down | 0 | Member status is not `Up` or `Joining` (leaving, removed, exiting). |
|
||||
| Critically degraded | 100 | ConfigDb unreachable AND data is stale. |
|
||||
| Stale | 200 | Data stale but ConfigDb reachable. |
|
||||
| Healthy follower | 240 | DB ok + OPC UA probe ok + not stale. |
|
||||
| Healthy leader | 250 | Healthy + this node is the `driver` role-leader. |
|
||||
|
||||
Any Admin-triggered `sp_PublishGeneration` acquires an apply lease through `ApplyLeaseRegistry.BeginApplyLease`. While the lease is held:
|
||||
Drivers write their computed byte into the OPC UA `ServiceLevel` Variable on each refresh. Clients with the standard redundancy heuristic ("pick the highest ServiceLevel") therefore prefer the role-leader and fall back to followers on its degradation.
|
||||
|
||||
- The calculator reports `PrimaryMidApply` / `BackupMidApply` — clients see the band shift and cut over to the unaffected peer rather than racing against a half-applied generation.
|
||||
- `RedundancyCoordinator.RefreshAsync` is called at the end of the apply window so the post-publish topology becomes visible exactly once, atomically.
|
||||
- The watchdog force-closes any lease older than `ApplyMaxDuration`; a stuck publisher therefore cannot strand a node at `PrimaryMidApply`.
|
||||
## Data flow
|
||||
|
||||
Because role transitions are **operator-driven** (write `RedundancyRole` in the Config DB + publish), the Backup never auto-promotes. An `IsolatedBackup` at 80 is the signal that the operator should intervene; auto-failover is intentionally out of scope for the non-transparent model (decision #154).
|
||||
```
|
||||
Cluster topology event ──┐
|
||||
DB health probe ─────────┤
|
||||
OPC UA peer probe ───────┤
|
||||
▼
|
||||
RedundancyStateActor (admin singleton)
|
||||
│ debounce 250ms
|
||||
▼
|
||||
DPS topic "redundancy-state"
|
||||
│
|
||||
▼
|
||||
Driver nodes' OpcUaPublishActor
|
||||
│
|
||||
▼
|
||||
ServiceLevelCalculator → byte
|
||||
│
|
||||
▼
|
||||
OPC UA ServiceLevel Variable
|
||||
```
|
||||
|
||||
## Metrics
|
||||
The admin singleton is the cluster's only `RedundancyStateActor`. If the admin leader fails over, the new admin node spins up its replacement, re-subscribes to cluster events, and publishes a fresh snapshot from the current `Cluster.State`. There is no DB-persisted state to recover.
|
||||
|
||||
`RedundancyMetrics` in `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/RedundancyMetrics.cs` registers the `ZB.MOM.WW.OtOpcUa.Redundancy` meter on the Admin process. Instruments:
|
||||
## Configuration
|
||||
|
||||
| Name | Kind | Tags | Description |
|
||||
|---|---|---|---|
|
||||
| `otopcua.redundancy.role_transition` | Counter<long> | `cluster.id`, `node.id`, `from_role`, `to_role` | Incremented every time `FleetStatusPoller` observes a `RedundancyRole` change on a `ClusterNode` row. |
|
||||
| `otopcua.redundancy.primary_count` | ObservableGauge<long> | `cluster.id` | Primary-role nodes per cluster — should be exactly 1 in nominal state. |
|
||||
| `otopcua.redundancy.secondary_count` | ObservableGauge<long> | `cluster.id` | Secondary-role nodes per cluster. |
|
||||
| `otopcua.redundancy.stale_count` | ObservableGauge<long> | `cluster.id` | Nodes whose `LastSeenAt` exceeded the stale threshold. |
|
||||
Per-node identity comes from `appsettings.json` + the `OTOPCUA_ROLES` env var:
|
||||
|
||||
Admin `Program.cs` wires OpenTelemetry to the Prometheus exporter when `Metrics:Prometheus:Enabled=true` (default), exposing the meter under `/metrics`. The endpoint is intentionally unauthenticated — fleet conventions put it behind a reverse-proxy basic-auth gate if needed.
|
||||
```json
|
||||
{
|
||||
"Cluster": {
|
||||
"Hostname": "0.0.0.0",
|
||||
"Port": 4053,
|
||||
"PublicHostname": "node-a.lan",
|
||||
"SeedNodes": ["akka.tcp://otopcua@node-a.lan:4053"],
|
||||
"Roles": ["admin", "driver"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Real-time notifications (Admin UI)
|
||||
```
|
||||
OTOPCUA_ROLES=admin,driver
|
||||
```
|
||||
|
||||
`FleetStatusPoller` in `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Hubs/` polls the `ClusterNode` table, records role transitions, updates `RedundancyMetrics.SetClusterCounts`, and pushes a `RoleChanged` SignalR event onto `FleetStatusHub` when a transition is observed. `RedundancyTab.razor` subscribes with `_hub.On<RoleChangedMessage>("RoleChanged", …)` so connected Admin sessions see role swaps the moment they happen.
|
||||
Both nodes share the same `ConfigDb` connection string; `Cluster.PublicHostname` + `Roles` are what makes them distinct in cluster gossip. The first node bootstraps the cluster (its address goes in `SeedNodes`); the second node joins via the same `SeedNodes` list.
|
||||
|
||||
## Configuring a redundant pair
|
||||
There is no longer a `Node:NodeId` setting, no `ClusterNode.RedundancyRole`, no `ServiceLevelBase`. NodeId is derived as `host:port` of the cluster `PublicHostname` (see `ClusterRoleInfo.LocalNode` for the formula).
|
||||
|
||||
Redundancy is configured **in the Config DB, not appsettings.json**. The fields that must differ between the two instances:
|
||||
### Peer URI advertising
|
||||
|
||||
| Field | Location | Instance 1 | Instance 2 |
|
||||
|---|---|---|---|
|
||||
| `NodeId` | `appsettings.json` `Node:NodeId` (bootstrap) | `node-a` | `node-b` |
|
||||
| `ClusterNode.ApplicationUri` | Config DB | `urn:node-a:OtOpcUa` | `urn:node-b:OtOpcUa` |
|
||||
| `ClusterNode.RedundancyRole` | Config DB | `Primary` | `Secondary` |
|
||||
| `ClusterNode.ServiceLevelBase` | Config DB | typically 255 | typically 100 |
|
||||
Each node advertises its partner via `OpcUaApplicationHostOptions.PeerApplicationUris` (an `IList<string>`, default empty). `OpcUaApplicationHost.PopulateServerArray` appends each configured peer URI to the SDK's `IServerInternal.ServerUris` string table after server startup, so that `Server.ServerArray` reads served by `OnReadServerArray` return both self + peers. Set this per-node in `appsettings.json`:
|
||||
|
||||
Shared between instances: `ClusterId`, Config DB connection string, published generation, cluster-level ACLs, UNS hierarchy, driver instances.
|
||||
```json
|
||||
{
|
||||
"OpcUaServer": {
|
||||
"PeerApplicationUris": ["urn:node-b:OtOpcUa"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Role swaps, stand-alone promotions, and base-level adjustments all happen through the Admin UI `RedundancyTab` — the operator edits the `ClusterNode` row in a draft generation and publishes. `RedundancyCoordinator.RefreshAsync` picks up the new topology without a process restart.
|
||||
Node A lists Node B's `ApplicationUri` and vice-versa. Validated by `DualEndpointTests` in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/` — boots two `OpcUaApplicationHost` instances on loopback, asserts a real OPCFoundation client `Session` reading `Server.ServerArray` from Node A sees both URIs.
|
||||
|
||||
## Split-brain
|
||||
|
||||
`akka.conf` configures Akka's split-brain resolver with `active-strategy = keep-oldest`, `stable-after = 15s`, and `failure-detector.threshold = 10.0`. Under a clean partition: the oldest member stays up + the smaller (or younger) side downs itself within ~15 seconds. The `RedundancyStateActor` on the surviving partition re-computes from the post-partition `Cluster.State`.
|
||||
|
||||
There is no operator-driven role swap during a partition. Failover is what the cluster does automatically.
|
||||
|
||||
## Client-side failover
|
||||
|
||||
The OtOpcUa Client CLI at `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI` supports `-F` / `--failover-urls` for automatic client-side failover; for long-running subscriptions the CLI monitors session KeepAlive and reconnects to the next available server, recreating the subscription on the new endpoint. See [`Client.CLI.md`](Client.CLI.md) for the command reference.
|
||||
The OtOpcUa Client CLI at `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI` supports `-F` / `--failover-urls` for automatic client-side failover; for long-running subscriptions the CLI monitors session KeepAlive and reconnects to the next available server, recreating the subscription on the new endpoint. See [`Client.CLI.md`](Client.CLI.md).
|
||||
|
||||
## Depth reference
|
||||
|
||||
For the full decision trail and implementation plan — topology invariants, peer-probe cadence, recovery-dwell policy, compliance-script guard against enum-value drift — see `docs/v2/plan.md` §Phase 6.3.
|
||||
For the full design — message contracts, tiered calculator truth table, recovery semantics — see `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §6.
|
||||
|
||||
@@ -111,13 +111,13 @@ Emissions map into `AlarmEventArgs` as `AlarmType = Kind.ToString()`, `SourceNod
|
||||
|
||||
## Composition
|
||||
|
||||
`Phase7EngineComposer.Compose` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7EngineComposer.cs`) is the single call site that instantiates the engine. It takes the generation's `Script` / `VirtualTag` / `ScriptedAlarm` rows, the shared `CachedTagUpstreamSource`, an `IAlarmStateStore`, and an `IAlarmHistorianSink`, and returns a `Phase7ComposedSources` the caller owns. When `scriptedAlarms.Count > 0`:
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) is the single call site that instantiates the engine. It takes the generation's `Script` / `VirtualTag` / `ScriptedAlarm` rows, the shared upstream-tag source, an `IAlarmStateStore`, and an `IAlarmHistorianSink`, and returns the composed sources the caller owns. When `scriptedAlarms.Count > 0`:
|
||||
|
||||
1. `ProjectScriptedAlarms` resolves each row's `PredicateScriptId` against the script dictionary and produces a `ScriptedAlarmDefinition` list. Unknown or disabled scripts throw immediately — the DB publish guarantees referential integrity but this is a belt-and-braces check.
|
||||
2. A `ScriptedAlarmEngine` is constructed with the upstream source, the store, a shared `ScriptLoggerFactory` keyed to `scripts-*.log`, and the root Serilog logger.
|
||||
3. `alarmEngine.OnEvent` is wired to `RouteToHistorianAsync`, which projects each emission into an `AlarmHistorianEvent` and enqueues it on the sink. Fire-and-forget — the SQLite store-and-forward sink is already non-blocking.
|
||||
4. `LoadAsync(alarmDefs)` runs synchronously on the startup thread: it compiles every predicate, subscribes to the union of predicate inputs and message-template tokens, seeds the value cache, loads persisted state, re-derives `ActiveState` from a fresh predicate evaluation, and starts the 5s shelving timer. Compile failures are aggregated into one `InvalidOperationException` so operators see every bad predicate in one startup log line rather than one at a time.
|
||||
5. A `ScriptedAlarmSource` is created for the event stream, and a `ScriptedAlarmReadable` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/ScriptedAlarmReadable.cs`) is created for OPC UA variable reads on the alarm's active-state node (task #245) — unknown alarm ids return `BadNodeIdUnknown` rather than silently reading `false`.
|
||||
5. A `ScriptedAlarmSource` is created for the event stream; the v2 `ScriptedAlarmActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`) owns the active-state surface for OPC UA variable reads on the alarm's active-state node (task #245) — unknown alarm ids return `BadNodeIdUnknown` rather than silently reading `false`.
|
||||
|
||||
Both engine and source are added to `Phase7ComposedSources.Disposables`, which `Phase7Composer` disposes on server shutdown.
|
||||
|
||||
@@ -132,5 +132,7 @@ Both engine and source are added to `Phase7ComposedSources.Disposables`, which `
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/AlarmTypes.cs` — `AlarmKind` + the four Part 9 enums
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/MessageTemplate.cs` — `{path}` placeholder resolver
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/IAlarmStateStore.cs` — persistence contract + `InMemoryAlarmStateStore` default
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7EngineComposer.cs` — composition, config-row projection, historian routing
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/ScriptedAlarmReadable.cs` — `IReadable` adapter exposing `ActiveState` to OPC UA variable reads
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — composition, config-row projection, historian routing
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — applies the composed Phase 7 plan into the SDK node manager
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs` — actor wrapper owning the alarm state machine and exposing `ActiveState` for OPC UA variable reads
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynScriptedAlarmEvaluator.cs` — production Roslyn predicate evaluator
|
||||
|
||||
+65
-41
@@ -1,62 +1,86 @@
|
||||
# Service Hosting
|
||||
# Service Hosting (v2)
|
||||
|
||||
## Overview
|
||||
|
||||
A production OtOpcUa deployment runs **two or three processes**, each
|
||||
with a distinct runtime and install surface:
|
||||
A production OtOpcUa deployment runs **one binary per node**, plus the optional Wonderware historian sidecar:
|
||||
|
||||
| Process | Project | Runtime | Platform | Responsibility |
|
||||
|---|---|---|---|---|
|
||||
| **OtOpcUa Server** | `src/Server/ZB.MOM.WW.OtOpcUa.Server` | .NET 10 | x64 | Hosts the OPC UA endpoint; loads every driver in-process (Modbus, S7, AbCip, AbLegacy, TwinCAT, FOCAS, OPC UA Client, Galaxy via mxaccessgw); exposes `/healthz`. |
|
||||
| **OtOpcUa Admin** | `src/Server/ZB.MOM.WW.OtOpcUa.Admin` | .NET 10 (ASP.NET Core / Blazor Server) | x64 | Operator UI for Config DB editing + fleet status, SignalR hubs (`FleetStatusHub`, `AlertHub`), Prometheus `/metrics`. |
|
||||
| **OtOpcUa Wonderware Historian** *(optional)* | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware` | .NET Framework 4.8 | x86 (32-bit) | Out-of-process sidecar exposing the Wonderware Historian SDK over a named pipe. Required only when `Historian:Wonderware:Enabled=true` in `appsettings.json`. |
|
||||
| **OtOpcUa Host** | `src/Server/ZB.MOM.WW.OtOpcUa.Host` | .NET 10 | AnyCPU | Single fused binary. `OTOPCUA_ROLES` env decides what to mount: `admin` (Blazor + auth + control-plane singletons), `driver` (OPC UA endpoint + per-driver actors), or both. |
|
||||
| **OtOpcUa Wonderware Historian** *(optional)* | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware` | .NET Framework 4.8 | x86 (32-bit) | Out-of-process sidecar exposing the Wonderware Historian SDK over a named pipe. Required only when `Historian:Wonderware:Enabled=true`. |
|
||||
|
||||
Galaxy access uses a separately-installed **mxaccessgw** running out
|
||||
of a sibling repo (`c:\Users\dohertj2\Desktop\mxaccessgw\`) — see
|
||||
`docs/v2/Galaxy.ParityRig.md` for setup. The mxaccessgw owns the
|
||||
MXAccess COM bitness constraint (its worker is x86 net48); nothing
|
||||
in the OtOpcUa repo carries that constraint anymore. PR 7.2 retired
|
||||
the legacy in-process `Galaxy.Host` / `Galaxy.Proxy` / `Galaxy.Shared`
|
||||
projects + the `OtOpcUaGalaxyHost` Windows service.
|
||||
Galaxy access still uses the separately-installed **mxaccessgw** sidecar (see `docs/v2/Galaxy.ParityRig.md`); the gateway owns the MXAccess COM bitness constraint (its worker is x86 net48). Nothing in the OtOpcUa repo carries that constraint anymore.
|
||||
|
||||
## OtOpcUa Server
|
||||
> **v2 change.** v1's separate `OtOpcUa.Server` + `OtOpcUa.Admin` Windows services merged into a single role-gated `OtOpcUa.Host` binary. Two installers became one (with a `-Roles` parameter). The whole DI graph is composed in `OtOpcUa.Host/Program.cs`; per-role wiring is conditional on the env var.
|
||||
|
||||
Hosted via `Microsoft.Extensions.Hosting` with `AddWindowsService`
|
||||
(decision #30 — replaced TopShelf in v2). The host's `Build()`
|
||||
returns immediately when launched interactively (e.g. `dotnet run`)
|
||||
but blocks for SCM signals when running as a Windows service.
|
||||
## Role gating
|
||||
|
||||
In-process drivers are registered at startup in `Program.cs`'s
|
||||
`DriverFactoryRegistry` block; the `DriverInstance` rows in the
|
||||
central Config DB select which driver factories materialise into
|
||||
live `IDriver` instances. See `docs/v2/driver-specs.md` for the
|
||||
per-driver `DriverConfig` JSON shapes.
|
||||
`Program.cs` reads `OTOPCUA_ROLES`, parses it with `RoleParser`, and conditionally registers services:
|
||||
|
||||
## OtOpcUa Admin
|
||||
| Role present | Wires |
|
||||
|---|---|
|
||||
| `admin` | `AddOtOpcUaAuth`, `AddAdminUI`, `AddSignalR`, `AddOtOpcUaAdminClients`, `MapOtOpcUaAuth`, `MapAdminUI<App>`, `MapOtOpcUaHubs`, `WithOtOpcUaControlPlaneSingletons` (5 admin singletons via `Akka.Hosting`) |
|
||||
| `driver` | `WithOtOpcUaRuntimeActors` (DriverHostActor + DbHealthProbeActor) — and the OPC UA endpoint on port 4840 |
|
||||
| Either / both | `AddOtOpcUaConfigDb`, `AddOtOpcUaCluster`, `AddOtOpcUaHealth` (`/health/ready`, `/health/active`, `/healthz`) |
|
||||
|
||||
Same hosting model; runs the Blazor Server UI + SignalR hubs.
|
||||
Reads from the same Config DB the Server writes to.
|
||||
Single-node dev: `OTOPCUA_ROLES=admin,driver`. Production: typically two admin nodes (HA pair) + N driver nodes.
|
||||
|
||||
### Per-role configuration overlays
|
||||
|
||||
`Program.cs:33-35` builds a role suffix by joining the parsed roles **alphabetically** with `-` and loads `appsettings.{roleSuffix}.json` as an optional overlay on top of base `appsettings.json`. Three overlays ship in `src/Server/ZB.MOM.WW.OtOpcUa.Host/`:
|
||||
|
||||
- `appsettings.admin.json` — admin-only nodes
|
||||
- `appsettings.driver.json` — driver-only nodes
|
||||
- `appsettings.admin-driver.json` — fused single-node dev / small deployments
|
||||
|
||||
All three carry Serilog log-level overrides + `Security:Ldap:DevStubMode = false`. Loading order is **base `appsettings.json` → role overlay (`appsettings.{role}.json`) → environment overlay (`appsettings.{Environment}.json`)** — later layers win. Overlays are optional; the base file boots a node on its own.
|
||||
|
||||
## Akka cluster
|
||||
|
||||
The host joins an Akka.NET cluster bound to the address in `appsettings.json::Cluster`:
|
||||
|
||||
```json
|
||||
{
|
||||
"Cluster": {
|
||||
"Hostname": "0.0.0.0",
|
||||
"Port": 4053,
|
||||
"PublicHostname": "node-a.lan",
|
||||
"SeedNodes": ["akka.tcp://otopcua@node-a.lan:4053"],
|
||||
"Roles": ["admin", "driver"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `WithOtOpcUaClusterBootstrap` (in `OtOpcUa.Cluster`) loads the embedded HOCON (split-brain resolver, pinned dispatcher, failure detector tuning) and overlays remote endpoint + cluster options.
|
||||
- All cluster singletons + per-node actors live on this single ActorSystem — there is no second Akka instance.
|
||||
|
||||
See [Redundancy.md](Redundancy.md) for the role-leader + ServiceLevel story.
|
||||
|
||||
## Health endpoints
|
||||
|
||||
Both admin and driver nodes expose:
|
||||
|
||||
| Path | Status meaning |
|
||||
|---|---|
|
||||
| `/healthz` | Process alive. |
|
||||
| `/health/ready` | ConfigDb reachable + cluster member state is `Up`. |
|
||||
| `/health/active` | Admin-role leader (the node Traefik or an HA LB should route traffic to). |
|
||||
|
||||
Used by Traefik for the active-leader-only routing pattern (see [Task 63 traefik docs](v2/Architecture-v2.md) — TODO).
|
||||
|
||||
## OtOpcUa Wonderware Historian (optional)
|
||||
|
||||
When `Historian:Wonderware:Enabled=true`, the Server speaks to a
|
||||
sidecar that wraps the Wonderware Historian SDK (which is .NET
|
||||
Framework only). The pipe IPC contract is in
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/Contracts/`
|
||||
and the sidecar's pipe handler lives at
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Pipe/`.
|
||||
|
||||
Install via the `-InstallWonderwareHistorian` switch on
|
||||
`scripts/install/Install-Services.ps1`.
|
||||
Unchanged from v1. Pipe IPC contract lives in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/Contracts/`; sidecar pipe handler in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Pipe/`. Install via `scripts/install/Install-Services.ps1 -InstallWonderwareHistorian`.
|
||||
|
||||
## Install / Uninstall
|
||||
|
||||
- `scripts/install/Install-Services.ps1` — installs `OtOpcUa` and
|
||||
optionally `OtOpcUaWonderwareHistorian`.
|
||||
- `scripts/install/Uninstall-Services.ps1` — stops + removes both,
|
||||
plus `OtOpcUaGalaxyHost` if a pre-7.2 rig still carries it.
|
||||
- `scripts/install/Install-Services.ps1 -Roles admin,driver` — installs `OtOpcUaHost`. v2 rewrite tracked as plan Task 62.
|
||||
- `scripts/install/Uninstall-Services.ps1` — stops + removes the host service (and the historian sidecar if installed).
|
||||
|
||||
## Logging
|
||||
|
||||
Serilog with rolling-daily file sinks. Each service writes to
|
||||
`%ProgramData%\OtOpcUa\<service>-*.log` plus stdout (NSSM-friendly).
|
||||
Serilog with rolling-daily file sinks. Each host writes to `logs/otopcua-*.log` plus stdout (NSSM/systemd-friendly). Per-environment log level overrides go in `appsettings.{Environment}.json`.
|
||||
|
||||
## Depth reference
|
||||
|
||||
For the full host-architecture rationale (why fused vs. split, role-gating tradeoffs, multi-node deployment shapes), see `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §3-4.
|
||||
|
||||
+8
-8
@@ -107,13 +107,12 @@ Per [ADR-002](v2/implementation/adr-002-driver-vs-virtual-dispatch.md) Option B,
|
||||
|
||||
`ITagUpstreamSource` and `IHistoryWriter` are the two ports the engine requires from its host. Both live in `Core.VirtualTags`. In the Server process:
|
||||
|
||||
- **`CachedTagUpstreamSource`** (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/CachedTagUpstreamSource.cs`) implements the interface (and the parallel `Core.ScriptedAlarms.ITagUpstreamSource` — identical shape, distinct namespace). A `ConcurrentDictionary<path, DataValueSnapshot>` cache. `Push(path, snapshot)` updates the cache and fans out synchronously to every observer. Reads of never-pushed paths return `BadNodeIdUnknown` quality (`UpstreamNotConfigured = 0x80340000`).
|
||||
- **`DriverSubscriptionBridge`** (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/DriverSubscriptionBridge.cs`) feeds the cache. For each registered `ISubscribable` driver it batches a single `SubscribeAsync` for every fullRef the script graph references, installs an `OnDataChange` handler that translates driver-opaque fullRefs back to UNS paths via a reverse map, and pushes each delta into `CachedTagUpstreamSource`. Unsubscribes on dispose. The bridge suppresses `OTOPCUA0001` (the Roslyn analyzer that requires `ISubscribable` callers to go through `CapabilityInvoker`) on the documented basis that this is a lifecycle wiring, not per-evaluation hot path.
|
||||
- **Upstream-tag feed.** In v2 the upstream-tag feed is provided by the actor system. `DependencyMuxActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs`) multiplexes driver `ISubscribable` subscriptions for every fullRef the script graph references, translating driver-opaque fullRefs back to UNS paths via a reverse map. Deltas land on `VirtualTagActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/VirtualTagActor.cs`) as `DependencyValueChanged` messages; the actor's in-memory cache serves the engine's synchronous `GetTag` reads. Reads of never-pushed paths return `BadNodeIdUnknown` quality (`UpstreamNotConfigured = 0x80340000`).
|
||||
- **`IHistoryWriter`** — no production implementation is currently wired for virtual tags; `VirtualTagEngine` gets `NullHistoryWriter` by default from `Phase7EngineComposer`.
|
||||
|
||||
## Composition
|
||||
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7Composer.cs`) is an `IAsyncDisposable` injected into `OpcUaServerService`:
|
||||
`Phase7Composer` (`src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`) projects the published generation into a `Phase7Plan` that `Phase7Applier` applies to the running SDK node manager:
|
||||
|
||||
1. `PrepareAsync(generationId, ct)` — called after the bootstrap generation loads and before `OpcUaApplicationHost.StartAsync`. Reads the `Script` / `VirtualTag` / `ScriptedAlarm` rows for that generation from the config DB (`OtOpcUaConfigDbContext`). Empty-config fast path returns `Phase7ComposedSources.Empty`.
|
||||
2. Constructs a `CachedTagUpstreamSource` + hands it to `Phase7EngineComposer.Compose`.
|
||||
@@ -145,8 +144,9 @@ Definition reload on config publish: `VirtualTagEngine.Load` is re-entrant — a
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/ITagUpstreamSource.cs` — driver-tag read + subscribe port
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/IHistoryWriter.cs` — historize sink port + `NullHistoryWriter`
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/VirtualTagSource.cs` — `IReadable` + `ISubscribable` adapter
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/CachedTagUpstreamSource.cs` — production `ITagUpstreamSource`
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/DriverSubscriptionBridge.cs` — driver `ISubscribable` → cache feed
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7EngineComposer.cs` — row projection + engine instantiation
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7Composer.cs` — lifecycle host: load rows, compose, wire bridge
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs` — `SelectReadable` + `IsWriteAllowedBySource` dispatch kernel
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/VirtualTagActor.cs` — actor wrapper that owns per-instance state and the synchronous read cache
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs` — driver `ISubscribable` → actor feed (replaces the v1 `DriverSubscriptionBridge`)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/RoslynVirtualTagEvaluator.cs` — production Roslyn evaluator wired into the actor
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs` — row projection + engine instantiation (`Phase7Plan` composer)
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Applier.cs` — applies the composed plan into the SDK node manager
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/GenericDriverNodeManager.cs` — driver-vs-virtual dispatch kernel
|
||||
|
||||
@@ -136,9 +136,10 @@ ConditionType events (non-base `BaseEventType`) is not verified.
|
||||
## Follow-up candidates
|
||||
|
||||
The easiest win here is to **wire the client driver tests against this
|
||||
repo's own server**. The integration test project
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs`
|
||||
already stands up a real OPC UA server on a non-default port with a seeded
|
||||
repo's own server**. The v2 integration test project
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs`
|
||||
(the v2 replacement for the retired v1 `OpcUaServerIntegrationTests`) already
|
||||
stands up a real OPC UA server on a non-default port with a seeded
|
||||
FakeDriver. An `OpcUaClientLiveLoopbackTests` that connects the client
|
||||
driver to that server would give:
|
||||
|
||||
@@ -165,6 +166,6 @@ Beyond that:
|
||||
mocked `Session`
|
||||
- `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs` — ctor +
|
||||
session-factory seam tests mock through
|
||||
- `tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/OpcUaServerIntegrationTests.cs` —
|
||||
the server-side integration harness a future loopback client test could
|
||||
piggyback on
|
||||
- `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs` —
|
||||
the v2 dual-endpoint integration harness a future loopback client test could
|
||||
piggyback on (v1 `OpcUaServerIntegrationTests.cs` retired with the v1 server project)
|
||||
|
||||
@@ -0,0 +1,451 @@
|
||||
# OtOpcUa v2 — Akka.NET + Fused Hosting Alignment with ScadaLink
|
||||
|
||||
**Status:** Design approved, ready for implementation planning
|
||||
**Date:** 2026-05-26
|
||||
**Branch:** `v2-akka-fuse`
|
||||
**Sister project reference:** `~/Desktop/scadalink-design` (ScadaLink)
|
||||
|
||||
## 1. Motivation
|
||||
|
||||
OtOpcUa today runs as three separate processes (`OtOpcUa.Server` OPC UA host, `OtOpcUa.Admin` Blazor Server web UI, optional `OtOpcUaWonderwareHistorian` Framework sidecar) with manual operator-driven warm-redundancy failover. The sister project ScadaLink — owned by the same developer — solved similar problems with a fused single-binary, role-gated hosting model on top of an Akka.NET cluster.
|
||||
|
||||
The motivation for this refactor is twofold:
|
||||
|
||||
1. **Consistency.** A single developer (the project owner) moves between OtOpcUa and ScadaLink frequently. Sharing patterns — hosting, auth, actor hierarchy, deployment model — reduces cognitive overhead and makes fixes portable.
|
||||
2. **Real HA improvements.** Upgrade OtOpcUa's manual operator-driven failover to automatic, Akka-cluster-driven failover with Traefik routing for the web UI. Preserve OPC UA dual-endpoint client-side failover semantics (clients connect to both nodes and pick based on `ServiceLevel`), now driven automatically by Akka cluster leadership.
|
||||
|
||||
## 2. Architecture overview
|
||||
|
||||
**One binary, role-gated.** `OtOpcUa.Host` (Microsoft.NET.Sdk.Web, .NET 10) replaces `OtOpcUa.Server` and `OtOpcUa.Admin`. Same binary on every node. Role configured via `OTOPCUA_ROLES` environment variable.
|
||||
|
||||
**Two Akka roles, single cluster:**
|
||||
|
||||
- **`admin`** — hosts Blazor web UI + cluster singletons. Singletons pinned via `ClusterSingletonManagerSettings.WithRole("admin")`. Traefik routes `/` to whichever Admin-role node `/health/active` reports as leader.
|
||||
- **`driver`** — hosts OPC UA endpoint + per-node `DriverHostActor` hierarchy. Every Driver-role node always serves OPC UA; `ServiceLevel` computed by `RedundancyStateActor` is broadcast back to each Driver node and used to publish to the local OPC UA address space.
|
||||
|
||||
Roles are additive: `OTOPCUA_ROLES=admin`, `OTOPCUA_ROLES=driver`, or `OTOPCUA_ROLES=admin,driver`. Small deployments run both roles on both nodes; larger deployments separate them.
|
||||
|
||||
**Per-role leadership.** `Cluster.Get(system).State.RoleLeader("driver")` drives OPC UA `ServiceLevel`. `RoleLeader("admin")` drives `/health/active` (Traefik routing). These are independent — admin and driver leadership can land on different nodes if separated.
|
||||
|
||||
**Cluster membership.** Both seed nodes; keep-oldest split-brain resolver; `down-if-alone = on`; 15s stable-after; 2s heartbeat / 10s threshold. CoordinatedShutdown for graceful singleton handover. Exact ScadaLink tuning.
|
||||
|
||||
**OPC UA dual-endpoint preserved.** Driver-role nodes all bind `opc.tcp://0.0.0.0:4840`. Clients still see N endpoints in `ServerUriArray` and fail over via `ServiceLevel`. OPC UA spec compliance unchanged from today.
|
||||
|
||||
**Mac dev:** role `admin,driver,dev` — `dev` short-circuits Windows-only driver registration (Galaxy, Wonderware) with explicit `[DEV-STUB]` log lines.
|
||||
|
||||
## 3. Project & process restructure
|
||||
|
||||
Single solution, ScadaLink-style folder layout. Existing OtOpcUa naming convention (`ZB.MOM.WW.OtOpcUa.*`) preserved.
|
||||
|
||||
### New entry point & deletions
|
||||
|
||||
| Action | Project |
|
||||
|---|---|
|
||||
| **New** | `OtOpcUa.Host` — `Microsoft.NET.Sdk.Web`, single Program.cs, role-gated startup, `AddWindowsService` |
|
||||
| **Delete** | `OtOpcUa.Server` (content migrates) |
|
||||
| **Delete** | `OtOpcUa.Admin` (UI moves to library) |
|
||||
|
||||
### New libraries
|
||||
|
||||
| Project | Owns | ScadaLink analog |
|
||||
|---|---|---|
|
||||
| `OtOpcUa.Commons` | Entity POCOs, interfaces, message contracts (`Types/`, `Interfaces/`, `Entities/`, `Messages/`) | `ScadaLink.Commons` |
|
||||
| `OtOpcUa.ConfigDb` | EF Core `DbContext`, repositories, `IAuditService`, migrations, Data Protection key store | `ScadaLink.ConfigurationDatabase` |
|
||||
| `OtOpcUa.Cluster` | Akka HOCON, `AkkaHostedService`, split-brain resolver config, role-aware membership helpers, `IClusterRoleInfo` | (split out of ScadaLink Host) |
|
||||
| `OtOpcUa.Security` | LDAP bind, cookie+JWT hybrid, JWT issuance, role mapping, `/auth/login`, `/auth/ping` endpoints | `ScadaLink.Security` |
|
||||
| `OtOpcUa.ControlPlane` | Cluster singletons: `ConfigPublishCoordinator`, `AdminOperationsActor`, `AuditWriterActor`, `FleetStatusBroadcaster`, `RedundancyStateActor` | `ScadaLink.ManagementService` |
|
||||
| `OtOpcUa.Runtime` | Per-node actors: `DriverHostActor`, `DriverInstanceActor`, `VirtualTagActor`, `ScriptedAlarmActor`, `OpcUaPublishActor`, `HistorianAdapterActor`, `PeerOpcUaProbeActor`, `DbHealthProbeActor` | `ScadaLink.SiteRuntime` |
|
||||
| `OtOpcUa.OpcUaServer` | OPC UA app host, address-space build, `Phase7Composer` extraction | (in ScadaLink.SiteRuntime/DCL) |
|
||||
| `OtOpcUa.AdminUI` | Blazor components, hubs (`FleetStatusHub`, `AlertHub`, `ScriptLogHub`), auth state provider, `MapAdminUI<TApp>()` | `ScadaLink.CentralUI` |
|
||||
|
||||
### Unchanged
|
||||
|
||||
- Driver projects (`OtOpcUa.Driver.Galaxy`, `.Modbus`, `.S7`, `.AbCip`, `.AbLegacy`, `.TwinCAT`, `.FOCAS`, `.OpcUaClient`) — still implement `IDriver`, now consumed by `DriverInstanceActor` instead of `DriverInstanceBootstrapper`.
|
||||
- `OtOpcUa.Driver.Historian.Wonderware` — .NET Framework 4.8 sidecar, named-pipe IPC, wrapped by a `HistorianAdapterActor` in `OtOpcUa.Runtime`.
|
||||
- `mxaccessgw` sibling repo — unchanged; Galaxy driver still talks gRPC to it.
|
||||
|
||||
### Tests
|
||||
|
||||
- `tests/OtOpcUa.Cluster.Tests` — split-brain, leadership transitions
|
||||
- `tests/OtOpcUa.ControlPlane.Tests` — singleton actor unit tests via Akka.TestKit
|
||||
- `tests/OtOpcUa.Runtime.Tests` — per-node actor tests, driver lifecycle
|
||||
- `tests/OtOpcUa.Security.Tests` — LDAP, cookie+JWT roundtrip
|
||||
- `tests/OtOpcUa.Host.IntegrationTests` — 2-node in-process cluster, deployment flow, failover, Mac-safe
|
||||
- `tests/OtOpcUa.OpcUa.IntegrationTests` — real OPCFoundation client against stubbed Host
|
||||
- `tests/OtOpcUa.E2E.Tests` — full stack with Traefik (nightly CI)
|
||||
|
||||
### Deploy
|
||||
|
||||
- `deploy/Install-Services.ps1` — installs one Windows Service per node (`OtOpcUaHost`), passes role via env var. Old script replaced.
|
||||
- `deploy/traefik/` — Windows Traefik config + service registration for the leader-routed `/health/active`.
|
||||
- `docker-dev/` (new, optional) — 2-node Mac dev compose with stubbed drivers + LDAP + SQL Server + Traefik.
|
||||
|
||||
Solution file: `OtOpcUa.slnx` (matches ScadaLink convention; switch from current `.sln`).
|
||||
|
||||
## 4. Actor hierarchy
|
||||
|
||||
### Per-node tree
|
||||
|
||||
Rooted under `OtOpcUa.Runtime`, one tree per Driver-role node:
|
||||
|
||||
```
|
||||
DriverHostActor (per-node coordinator, started by Host)
|
||||
├─ DriverInstanceActor (per DriverInstance row)
|
||||
│ └─ children = pooled or per-subscription work
|
||||
├─ VirtualTagActor (per VirtualTag row)
|
||||
├─ ScriptedAlarmActor (per ScriptedAlarm row)
|
||||
├─ OpcUaPublishActor (per-node bridge to OPCFoundation address space)
|
||||
├─ HistorianAdapterActor (per-node, wraps Wonderware named-pipe sidecar)
|
||||
├─ PeerOpcUaProbeActor (per-node, tests peer OPC UA stack health)
|
||||
└─ DbHealthProbeActor (per-node, cached DB health probe)
|
||||
```
|
||||
|
||||
### Cluster singletons
|
||||
|
||||
Pinned to `admin` role via `ClusterSingletonManagerSettings.WithRole("admin")`:
|
||||
|
||||
| Actor | Owns | Notes |
|
||||
|---|---|---|
|
||||
| `ConfigPublishCoordinator` | The deploy protocol. Writes `Deployment` row, broadcasts `DispatchDeployment(deploymentId)` via `DistributedPubSub` to every `DriverHostActor`, tracks apply ACKs per node. | Replaces `ApplyLeaseRegistry`. Resumes after failover by re-reading ConfigDb state — no Akka.Persistence. |
|
||||
| `AdminOperationsActor` | All mutating admin ops (CRUD on equipment, drivers, scripts, namespaces, ACLs). Wraps each in an audit envelope. | UI calls via `ClusterSingletonProxy` (in-process when UI is on Admin node). |
|
||||
| `AuditWriterActor` | Receives `AuditEvent` telemetry from any node, batch-inserts into `ConfigAuditLog`. | Idempotent on `EventId`. |
|
||||
| `FleetStatusBroadcaster` | Aggregates Akka cluster member events + per-node `DriverHostStatus` heartbeats. Publishes diffs to `IHubContext<FleetStatusHub>` and `IHubContext<AlertHub>`. | Push-driven; replaces today's 5s `FleetStatusPoller`. |
|
||||
| `RedundancyStateActor` | Subscribes to `ClusterEvent.IMemberEvent` + `ClusterEvent.LeaderChanged` + per-node health probes. Computes `ServiceLevel` byte + `ServerUriArray` per Driver node. Publishes to `DistributedPubSub` topic `redundancy-state`. | Source of truth for OPC UA redundancy. Local `OpcUaPublishActor` subscribes and writes to its OPCFoundation stack. |
|
||||
|
||||
### Supervision
|
||||
|
||||
| Actor | Strategy |
|
||||
|---|---|
|
||||
| `DriverHostActor` | `Resume` |
|
||||
| `DriverInstanceActor` | `Restart` with backoff (1s → 30s, ×1.5, jitter) |
|
||||
| `VirtualTagActor` | `Restart` with backoff |
|
||||
| `ScriptedAlarmActor` | `Restart` with backoff; preserve alarm state via `PreRestart` hook |
|
||||
| `OpcUaPublishActor` | `Resume` |
|
||||
| `HistorianAdapterActor` | `Restart` with backoff; SQLite store-and-forward buffers during pipe outage |
|
||||
| All singletons | `Resume`; resumable state in ConfigDb |
|
||||
| Script execution actors (short-lived) | `Stop` on failure |
|
||||
|
||||
### State machines
|
||||
|
||||
- `DriverInstanceActor` — Become/Stash for `Connecting → Connected → Reconnecting → Failed`. Bad-quality publish on disconnect; transparent re-subscribe on reconnect. Write failures returned synchronously via `Ask` from `OpcUaPublishActor`.
|
||||
- `ConfigPublishCoordinator` — `Idle → Publishing → AwaitingApplyAcks → Sealed`, with timeout-driven escalation if a node fails to ack within `ApplyMaxDuration` (default 10 min).
|
||||
- `RedundancyStateActor` — recomputes on every membership event, debounced 250ms to coalesce bursts.
|
||||
|
||||
### Communication conventions
|
||||
|
||||
- **Tell** for hot-path internal traffic (driver values, alarm state changes, publish broadcasts).
|
||||
- **Ask** only at system boundaries (UI controller → `AdminOperationsActor`, with explicit timeout + cancellation token).
|
||||
- **DistributedPubSub** for cluster-wide broadcasts (`DispatchDeployment`, `RedundancyStateChanged`, `FleetStatusChanged`).
|
||||
- Application-level **correlation IDs** on every request/response message.
|
||||
- Messages live in `OtOpcUa.Commons.Messages.{Drivers,Deploy,Admin,Audit,Redundancy}` — additive-only evolution.
|
||||
|
||||
### Singleton persistence
|
||||
|
||||
No Akka.Persistence. Each singleton reads its resumable state from `ConfigDb` on `PreStart` (e.g., `ConfigPublishCoordinator` reads the current in-flight `Deployment` row + per-node `NodeDeploymentState`) and writes on every state transition.
|
||||
|
||||
### Mac-dev stubs
|
||||
|
||||
`DevNode` role short-circuits driver registration. `DriverInstanceActor` for any Galaxy/Wonderware row enters a `Stubbed` Become state that returns deterministic test values. Logged at INFO with `[DEV-STUB] driver={Name} reason=windows-only`.
|
||||
|
||||
## 5. Web hosting, auth, and SignalR
|
||||
|
||||
### Kestrel startup gated by `admin` role
|
||||
|
||||
`Program.cs` builds `WebApplicationBuilder`, registers all services, but only calls `app.MapBlazor<App>()`, `app.MapHub<...>()`, `app.MapStaticAssets()`, and auth endpoints when `admin ∈ roles`. Driver-only nodes still bind Kestrel for `/healthz` on `:4841` and nothing else.
|
||||
|
||||
### Authentication — cookie+JWT hybrid
|
||||
|
||||
| Layer | Config |
|
||||
|---|---|
|
||||
| Cookie scheme | `OtOpcUa.Auth`, HttpOnly, SameSite=Strict, Secure (prod) / SameAsRequest (dev). Sliding 30-min idle timeout. |
|
||||
| Embedded JWT | HMAC-SHA256, 15-min expiry, claims = `sub`, `roles`, `nodeAcls`. |
|
||||
| LDAP bind | `LdapAuthService.AuthenticateAsync(user, pw)` at `/auth/login` POST — preserved from current `OtOpcUa.Admin/Security`. |
|
||||
| Role mapping | `RoleMapper.MapGroupsToRolesAsync()` — LDAP groups → `FleetAdmin` / `ConfigEditor` / `ReadOnly`. Stays as-is. |
|
||||
| Token issuance | `/auth/token` returns bearer for external clients (CLI, automation). |
|
||||
| Circuit expiry probe | `/auth/ping` returns 200/401, polled by `CookieAuthenticationStateProvider` to detect expiry from inside a SignalR circuit. |
|
||||
| Failure mode | LDAP unreachable → new logins fail, active sessions continue. |
|
||||
|
||||
### Data Protection keys
|
||||
|
||||
`services.AddDataProtection().PersistKeysToDbContext<OtOpcUaConfigDbContext>().SetApplicationName("OtOpcUa")` — keys live in `ConfigDb` so a circuit started on Admin-node A survives if Traefik fails over to Admin-node B mid-session.
|
||||
|
||||
### SignalR hubs
|
||||
|
||||
Three existing hubs preserved (`/hubs/fleet`, `/hubs/alerts`, `/hubs/script-log`):
|
||||
|
||||
- **Today:** `FleetStatusPoller` polls SQL every 5s.
|
||||
- **New:** `FleetStatusBroadcaster` singleton receives Akka cluster events + per-node telemetry, pushes diffs via `IHubContext<FleetStatusHub>`. No polling.
|
||||
- `HubTokenService` bearer-token fallback retired — hubs are circuit-local, cookie auth flows through SignalR natively. External hub consumers use the bearer token from `/auth/token` with a `JwtBearer` authentication scheme declaration on the hub.
|
||||
|
||||
### UI → backend wiring
|
||||
|
||||
- **Reads:** Blazor components inject scoped repositories from DI and read directly from `ConfigDb`. No change from today.
|
||||
- **Writes / mutating ops:** Components inject `IAdminOperationsClient` — a thin wrapper around `ClusterSingletonProxy` to `AdminOperationsActor`. Mutations are `Ask` with a 10s timeout + correlation ID. Audit envelope built UI-side, completed singleton-side.
|
||||
- **Driver diagnostics:** Today's `DriverDiagnosticsClient` HTTP round-trip retires. UI components ask `IFleetDiagnosticsClient` which delegates to `ClusterClientReceptionist`-published actor messages.
|
||||
|
||||
### Health endpoints
|
||||
|
||||
| Endpoint | Returns | Used by |
|
||||
|---|---|---|
|
||||
| `/health/ready` | 200 once Akka member is `Up` + ConfigDb reachable + DataProtection key ring loaded | Service supervisor readiness gate |
|
||||
| `/health/active` | 200 only on the Admin-role leader; 503 elsewhere | Traefik — routes browser traffic to leader |
|
||||
| `/healthz` (existing) | 200 when Driver-role actor system is up + at least one driver registered (preserved on `:4841`) | Ops probes, OPC UA monitoring tools |
|
||||
|
||||
### Traefik
|
||||
|
||||
Windows Service (or external box). One route: `host=otopcua.*` → load-balance to `{admin-node-a:9000, admin-node-b:9000}` with `/health/active` health check, sticky sessions disabled (DataProtection key sharing handles continuity).
|
||||
|
||||
### appsettings structure
|
||||
|
||||
Mirrors ScadaLink's per-component options pattern: `Cluster:`, `Security:`, `ConfigDb:`, `OpcUa:`, `Drivers:`, `Historian:` sections, bound to options classes owned by their respective component projects.
|
||||
|
||||
## 6. Edit + Deploy flow (replaces draft/publish generations)
|
||||
|
||||
The single most consequential domain change: **drop the draft/publish `ConfigGeneration` lifecycle**. Edits are live; deploy is a snapshot+push, ScadaLink-style.
|
||||
|
||||
### Edit model
|
||||
|
||||
- `Equipment`, `Driver`, `DriverInstance`, `Namespace`, `UnsItem`, `Script`, `VirtualTag`, `ScriptedAlarm`, `NodeAcl` are edited **directly** via `AdminOperationsActor`. No draft staging, no `ConfigGeneration` lifecycle. Last-write-wins per row (rowversion column for stale-write detection only).
|
||||
- Live edits do **not** affect running Driver-role nodes — running stacks reflect the *last-deployed* state. The UI shows a "drift" indicator when live ConfigDb state differs from last sealed deployment.
|
||||
- Validation runs on edit (semantic checks: driver tag-path validity, script syntax, namespace name uniqueness) — pulled forward from deploy-time to edit-time.
|
||||
|
||||
### Deploy model
|
||||
|
||||
```
|
||||
Admin UI "Deploy" → AdminOperationsActor.Ask(StartDeployment)
|
||||
AdminOperationsActor:
|
||||
→ snapshot ConfigDb current state
|
||||
→ ConfigComposer.Flatten() → DeploymentArtifact
|
||||
→ compute RevisionHash = SHA256(canonical-serialized artifact)
|
||||
→ write Deployment row (DeploymentId GUID, RevisionHash, CreatedBy, CreatedAtUtc, Status=Dispatching)
|
||||
→ Ask ConfigPublishCoordinator.DispatchDeployment(deploymentId)
|
||||
|
||||
ConfigPublishCoordinator (cluster singleton, admin role):
|
||||
→ write Deployment.Status = Dispatching
|
||||
→ DistributedPubSub Publish to "deployments" topic: DispatchDeployment(deploymentId, revisionHash)
|
||||
→ schedule ApplyDeadline timer (ApplyMaxDuration, default 10 min)
|
||||
|
||||
DriverHostActor (per node, subscribed to "deployments"):
|
||||
receive DispatchDeployment(deploymentId, revisionHash):
|
||||
→ if currentDeploymentRevision == revisionHash → ack Applied (idempotent)
|
||||
→ else:
|
||||
→ acquire per-node ApplyLock (Become Applying(deploymentId))
|
||||
→ write NodeDeploymentState row (NodeId, DeploymentId, StartedAtUtc)
|
||||
→ fetch artifact: read DeploymentArtifact blob from ConfigDb by deploymentId
|
||||
→ diff against current applied artifact → per-instance ApplyDelta plans
|
||||
→ dispatch ApplyDelta to DriverInstanceActor / VirtualTagActor / ScriptedAlarmActor children
|
||||
→ collect per-instance acks (all-or-nothing per node)
|
||||
→ on full success: write GenerationSealedCache (LiteDb local), update NodeDeploymentState.AppliedAtUtc
|
||||
→ on any instance Failure: rollback to previous deployment, mark NodeDeploymentState=Failed
|
||||
→ Tell Coordinator: ApplyAck(deploymentId, nodeId, Applied | Failed(reason))
|
||||
→ Become Steady
|
||||
|
||||
ConfigPublishCoordinator: collect ApplyAcks
|
||||
→ all Driver nodes Applied → Deployment.Status = Sealed → DistributedPubSub PublishDeploymentSealed
|
||||
→ any Failed → Deployment.Status = PartiallyFailed → broadcast DeploymentFailed
|
||||
→ deadline elapsed before all acks → Deployment.Status = TimedOut → broadcast DeploymentTimedOut
|
||||
```
|
||||
|
||||
### Per-instance operation lock
|
||||
|
||||
All mutating commands (deploy, disable, enable, delete) on a `DriverInstance` go through `DriverInstanceActor`, which serializes them via the actor mailbox — single-threaded by construction.
|
||||
|
||||
### Idempotency
|
||||
|
||||
- `DeploymentId` + `RevisionHash` together identify a deployment.
|
||||
- `DriverHostActor` seeing a `DispatchDeployment` whose `RevisionHash` matches current applied state → immediate ack `Applied`, no work. Safe to redeliver.
|
||||
- `Phase7Composer.ComposeAsync(artifact)` is pure; same artifact → same delta plan.
|
||||
- `DriverInstanceActor.ApplyDelta(plan)` compares against current state, applies only diffs.
|
||||
|
||||
### Concurrency control
|
||||
|
||||
- Last-write-wins on edits (no optimistic concurrency on `Equipment`, `Driver`, `Script`, etc.) — matches ScadaLink template behavior.
|
||||
- **Optimistic concurrency on `Deployment` and `NodeDeploymentState` rows** (rowversion column) — prevents two concurrent Coordinator instances (during failover) from corrupting state.
|
||||
|
||||
### Singleton failover during deploy
|
||||
|
||||
1. Old Coordinator wrote `Deployment.Status = Dispatching` + `NodeDeploymentState` rows before broadcast.
|
||||
2. New Coordinator on takeover queries `Deployment` rows with non-terminal `Status`.
|
||||
3. For each in-flight deployment, `Ask` every `DriverHostActor` (via cluster-aware actor selection) for current `NodeDeploymentState`.
|
||||
4. Recompute outstanding-ack set; resume the deadline timer with the remaining time.
|
||||
5. If apply deadline already passed → mark `Deployment.Status = TimedOut` for any unack'd nodes.
|
||||
|
||||
### Crash recovery on Driver node restart
|
||||
|
||||
- `DriverHostActor.PreStart` reads `NodeDeploymentState` for self.
|
||||
- If row says `Applied` for some `DeploymentId` and matches last sealed cache → Become Steady on that artifact.
|
||||
- If row says `Applying` (didn't reach Applied) → discard partial state, re-fetch the artifact, replay apply (idempotent).
|
||||
- If ConfigDb unreachable → fall back to local LiteDb sealed cache, Become `Stale` (drops ServiceLevel via `RedundancyStateActor`). Background reconnect retries every 30s.
|
||||
|
||||
### Schema migration from today
|
||||
|
||||
| Today | New |
|
||||
|---|---|
|
||||
| `ConfigGeneration` (Draft/Published/Sealed lifecycle) | **Dropped** |
|
||||
| `ClusterNodeGenerationState` | Renamed → `NodeDeploymentState` with `(NodeId, DeploymentId, Status, StartedAtUtc, AppliedAtUtc, RowVersion)` |
|
||||
| `ClusterNode.RedundancyRole` column | **Dropped** (Akka leader-of-driver-role is source of truth) |
|
||||
| `ConfigAuditLog` | Kept; deploy events added as new event types |
|
||||
| (new) `Deployment` | `(DeploymentId, RevisionHash, Status, CreatedBy, CreatedAtUtc, ArtifactBlob varbinary(max), RowVersion)` |
|
||||
| (new) `ConfigEdit` audit row per Equipment/Driver/Script edit | Live-edit history |
|
||||
| (new) `DataProtectionKeys` | DataProtection key ring storage |
|
||||
|
||||
No more `ApplyLeaseRegistry` table or watchdog actor. Apply state lives in `NodeDeploymentState`; watchdog is a Coordinator-side scheduled message keyed by `DeploymentId`.
|
||||
|
||||
### Stale-config fallback
|
||||
|
||||
Preserved from today's `GenerationSealedCache`: local LiteDb cache holds last-applied `DeploymentArtifact`. On Host boot with ConfigDb unreachable, `DriverHostActor` boots from cache → Become `Stale` → `RedundancyStateActor` drops `ServiceLevel` for that node.
|
||||
|
||||
### Peer probes consolidated
|
||||
|
||||
| Today | New |
|
||||
|---|---|
|
||||
| `PeerHttpProbeLoop` (HTTP `/healthz`) | Retired — Akka failure detector replaces it |
|
||||
| `PeerUaProbeLoop` (OPC UA `opc.tcp://peer:4840`) | **Retained** as `PeerOpcUaProbeActor` — tests whether the OPC UA stack itself (not just the process) is up. Feeds `RedundancyStateActor`. |
|
||||
| `DbHealthCache` (cached DB probe) | Retained as `DbHealthProbeActor` per-node. Feeds `RedundancyStateActor` + `/health/ready`. |
|
||||
|
||||
### ServiceLevel computation in `RedundancyStateActor`
|
||||
|
||||
```
|
||||
serviceLevel(node) =
|
||||
base 240 if (cluster member Up AND db reachable AND not stale AND opc ua probe ok)
|
||||
base 200 if (member Up AND db reachable AND stale)
|
||||
base 100 if (member Up AND db unreachable AND stale)
|
||||
base 0 if (member Down / Unreachable)
|
||||
|
||||
+10 bonus if Akka driver-role leader is this node
|
||||
```
|
||||
|
||||
ServiceLevel bands match the existing `RedundancyStatePublisher` so OPC UA client behavior is unchanged from today. The leader-bonus replaces today's operator-managed `RedundancyRole = Primary`.
|
||||
|
||||
## 7. Error handling & failure modes
|
||||
|
||||
### Akka cluster failure modes
|
||||
|
||||
| Scenario | Behavior |
|
||||
|---|---|
|
||||
| Network partition (split-brain) | Keep-oldest resolver downs the smaller side after 15s stable-after. `down-if-alone = on` covers isolated nodes. |
|
||||
| Admin leader process crash | Failure detector trips after 10s, downs the member, new singleton instance starts on remaining Admin node. Traefik `/health/active` probe fails over within 1 polling interval (~5s). |
|
||||
| Driver-role node crash | RedundancyStateActor sees member Down → drops that node's ServiceLevel to 0 → OPC UA clients reconnect to surviving node. Both nodes were already running their own copy; no in-cluster recovery needed for that node's work. |
|
||||
| Both Admin nodes down simultaneously | Web UI unavailable. Driver nodes continue serving OPC UA from last-sealed cache. No new deployments possible until Admin node recovers. |
|
||||
| All Driver nodes down | OPC UA endpoints unavailable. Clients reconnect when any Driver node returns. ServiceLevel back to 240 once member Up + DB reachable + apply sealed. |
|
||||
| Singleton handover during deploy | Coordinator state survives in `Deployment` + `NodeDeploymentState` ConfigDb rows. New Coordinator queries DriverHostActors via cluster-aware actor selection. Resume remaining deadline. |
|
||||
|
||||
### ConfigDb unavailability
|
||||
|
||||
- **At edit time:** AdminUI returns user-visible error. No retries — operator decides.
|
||||
- **At deploy time:** Coordinator refuses to start dispatch if it can't write the `Deployment` row.
|
||||
- **At Driver node boot:** Fall back to local LiteDb sealed cache. RedundancyStateActor drops `ServiceLevel`.
|
||||
- **At singleton failover:** New Coordinator's `PreStart` retries via Polly (5 attempts, exponential backoff). If exhausted → singleton crashes → cluster restarts singleton on next viable Admin node.
|
||||
|
||||
### Driver / equipment failures
|
||||
|
||||
- Driver connection loss → `DriverInstanceActor` enters `Reconnecting` Become state, publishes bad-quality to OPC UA address space immediately, retries at fixed interval.
|
||||
- Tag-path-resolution failure → retried periodically.
|
||||
- Write failure to driver → returned synchronously to caller via `Ask` from `OpcUaPublishActor`.
|
||||
- Driver process unresponsive (Galaxy gateway down) → `IDriver.HealthCheck` returns degraded → `DriverInstanceActor` reports to `DriverHostActor` → `RedundancyStateActor` factors into ServiceLevel.
|
||||
|
||||
### Wonderware historian sidecar
|
||||
|
||||
- Named-pipe disconnect → `HistorianAdapterActor` enters `Reconnecting`; alarm history rows buffered to local SQLite store-and-forward.
|
||||
- Sidecar process crash → no in-cluster recovery (external process); operator restarts via Windows Service control.
|
||||
|
||||
### Auth failures
|
||||
|
||||
- LDAP unreachable → `/auth/login` returns 503. Active sessions continue with cached claims.
|
||||
- JWT signature failure (key ring drift) → 401, session terminates. DataProtection keys in ConfigDb prevent this in the happy path.
|
||||
- Cookie expired (sliding 30-min idle) → `/auth/ping` returns 401 → `CookieAuthenticationStateProvider` triggers UI logout.
|
||||
|
||||
### SignalR / circuit drops
|
||||
|
||||
- Blazor circuit dropped → `App.razor` reload script reconnects (preserved from today).
|
||||
- Hub message loss during reconnect → `FleetStatusBroadcaster` resends current state to the reconnecting client on `OnConnectedAsync` (full snapshot, not just diffs).
|
||||
|
||||
### OPC UA stack failures
|
||||
|
||||
- Address-space corruption → `OpcUaPublishActor` logs ERROR, sends `RebuildAddressSpace` to itself; sequence number bump notifies clients to resubscribe.
|
||||
- OPC UA listener bind failure (port collision) → Host fails readiness probe, supervisor restarts service.
|
||||
|
||||
### Audit invariants
|
||||
|
||||
- Audit write failures **never abort** the user-facing action. `AuditWriterActor` buffer overflow → log WARN, drop oldest (with counter metric). The action's success/failure path is authoritative.
|
||||
- All deploy + edit events carry `ExecutionId` (per-request correlation) so audit rows for one operator action share an ID.
|
||||
|
||||
## 8. Testing strategy
|
||||
|
||||
Test projects mirror the new layering. Test infrastructure stays Mac-friendly: stubbed Windows-only drivers, ephemeral SQL Server (LocalDB on Windows / `mcr.microsoft.com/mssql/server` container on Mac), `OpenLDAP` container, all spun up via `tests/docker-compose.yml`.
|
||||
|
||||
### Layered test pyramid
|
||||
|
||||
| Layer | Project | What it covers |
|
||||
|---|---|---|
|
||||
| **Unit** | `OtOpcUa.Runtime.Tests` | Per-actor logic via `Akka.TestKit.Xunit2`. `DriverInstanceActor` state-machine transitions, `Phase7Composer` purity, `ScriptedAlarmActor` state machine, `VirtualTagActor` expression eval. Drivers mocked via `IDriver` test doubles. |
|
||||
| **Unit** | `OtOpcUa.ControlPlane.Tests` | Singleton actor logic. `ConfigPublishCoordinator` happy path + timeout + concurrent ack ordering. `RedundancyStateActor` ServiceLevel computation truth table. `AuditWriterActor` batch flush + idempotency on duplicate `EventId`. |
|
||||
| **Unit** | `OtOpcUa.Cluster.Tests` | Split-brain resolver config validation, role-aware membership helpers, HOCON parses. |
|
||||
| **Unit** | `OtOpcUa.Security.Tests` | LDAP role mapping, JWT issuance, cookie+JWT roundtrip, `/auth/ping` expiry semantics. |
|
||||
| **Integration** | `OtOpcUa.Host.IntegrationTests` | 2-node in-process Akka cluster. Real SQL Server, stubbed drivers. Tests: deploy happy path, deploy timeout, deploy with one node down, singleton failover mid-deploy, ConfigDb outage + stale-config fallback, edit-then-deploy roundtrip, audit row emission. |
|
||||
| **Integration** | `OtOpcUa.OpcUa.IntegrationTests` | Real OPCFoundation client connects to a running stubbed Host. Asserts: dual endpoint visible, ServerUriArray populated, ServiceLevel reflects leader status, browse + read + write through `OpcUaPublishActor`, write failures returned synchronously. |
|
||||
| **End-to-end** | `OtOpcUa.E2E.Tests` | Full Host with Traefik in front, two Admin nodes + two Driver nodes (4 processes via Docker). Verifies: web UI login via LDAP, deploy from UI flows to OPC UA stack, kill admin leader → Traefik fails over within 25s, kill driver node → OPC UA clients reconnect with correct ServiceLevel. CI nightly. |
|
||||
|
||||
### Failover-specific test cases
|
||||
|
||||
1. Kill Admin leader during `Dispatching` phase → new Coordinator resumes, deployment seals.
|
||||
2. Kill Admin leader during `AwaitingApplyAcks` → new Coordinator queries DriverHostActors, completes ack collection.
|
||||
3. Kill Driver node during `Applying` → Coordinator marks that node's `NodeDeploymentState=Failed` after deadline; surviving Driver nodes complete their apply.
|
||||
4. Restart Driver node mid-deploy → on restart, replays apply (idempotent).
|
||||
5. Akka split-brain (network partition between 2 admin nodes) → keep-oldest wins, smaller side downs itself within 15s.
|
||||
6. Both Admin nodes restart simultaneously → deployments in `Dispatching` resume cleanly after cluster reforms.
|
||||
7. Concurrent edits to the same `DriverInstance` from two UI sessions → last write wins, both audit rows present, no row corruption.
|
||||
|
||||
### Deploy idempotency tests
|
||||
|
||||
- Replay `DispatchDeployment` with same `DeploymentId/RevisionHash` → no work, ack `Applied`.
|
||||
- Apply same `DeploymentArtifact` twice in a row → second application is a no-op.
|
||||
- Crash DriverHostActor mid-apply, restart → resumes from `NodeDeploymentState`, completes idempotently.
|
||||
|
||||
### Property tests
|
||||
|
||||
- `Phase7Composer.ComposeAsync` is pure: same artifact → same plan, no side effects.
|
||||
- `RedundancyStateActor` ServiceLevel computation: every combination of (member-state, db-ok, stale, opc-ok, is-leader) produces expected byte.
|
||||
- Audit envelope generation: every mutating op produces exactly one audit row with stable `ExecutionId` correlation.
|
||||
|
||||
### Mac-dev test invariants
|
||||
|
||||
- All unit + integration tests run on macOS without Windows-only assemblies.
|
||||
- Cluster tests use in-process Akka.Remote on 127.0.0.1.
|
||||
- LDAP tests use `OpenLDAP` container or `Security:Ldap:DevStubMode=true`.
|
||||
|
||||
### Retired tests
|
||||
|
||||
Anything touching `ConfigGeneration` lifecycle, `ApplyLeaseRegistry`, `PeerHttpProbeLoop`, `FleetStatusPoller`, `RedundancyCoordinator` peer-probe loops, `RedundancyStatePublisher`.
|
||||
|
||||
## 9. Risks & open questions
|
||||
|
||||
1. **Akka.NET on .NET 10.** Verify Akka.NET 1.5+ targets .NET 10 cleanly.
|
||||
2. **OPCFoundation SDK threading.** The OPC UA stack runs its own threadpool. `OpcUaPublishActor` must marshal writes via thread-safe wrappers; use a dedicated `synchronized-dispatcher` for actors that touch the OPC UA address space.
|
||||
3. **Failure detector tuning.** ScadaLink's 2s/10s is tuned for site-to-central RTT. Benchmark before locking. Aggressive tuning + GC pauses → spurious singleton handover.
|
||||
4. **ServiceLevel = Akka leader removes operator control.** No escape hatch in v1. If a customer needs one later, add a `PinnedPrimary` column to `ClusterNode` and an override path in `RedundancyStateActor`. Out of scope now.
|
||||
5. **Long-lived v2 branch drift.** Monthly rebase from main, CI runs on v2 from day one.
|
||||
6. **Schema migration is destructive.** Dropping `ConfigGeneration` + `ClusterNode.RedundancyRole` is one-way. Cutover must run on a quiesced system. Provide a `Migrate-To-V2.ps1` script that backs up ConfigDb, runs EF migrations, validates row counts, prints a summary.
|
||||
7. **Wonderware + mxaccessgw still external processes.** Both untouched by this refactor. Future actorization would be a second refactor.
|
||||
8. **Audit row volume.** Edit-heavy install ≈ 5k rows/day. Need monthly partition + 365-day retention same as ScadaLink #23.
|
||||
|
||||
## 10. Migration plan
|
||||
|
||||
Big-bang on `v2-akka-fuse` branch:
|
||||
|
||||
1. Branch `v2-akka-fuse` off `main`.
|
||||
2. Add new projects: `OtOpcUa.Host`, `.Cluster`, `.Security`, `.ControlPlane`, `.Runtime`, `.ConfigDb`, `.Commons`, `.AdminUI`, `.OpcUaServer`. Convert to `OtOpcUa.slnx`.
|
||||
3. Move ConfigDb access (EF context, repos, migrations) out of `Server` and `Admin` into `OtOpcUa.ConfigDb`. Add DataProtection key store table.
|
||||
4. Move LDAP + cookie + JWT out of `Admin/Security` into `OtOpcUa.Security`. Adopt 15-min JWT / 30-min sliding cookie / `/auth/ping`.
|
||||
5. Build `OtOpcUa.Cluster`: HOCON, `AkkaHostedService`, role-aware membership helpers, split-brain resolver.
|
||||
6. Build `OtOpcUa.ControlPlane`: `ConfigPublishCoordinator`, `AdminOperationsActor`, `AuditWriterActor`, `FleetStatusBroadcaster`, `RedundancyStateActor`.
|
||||
7. Build `OtOpcUa.Runtime`: `DriverHostActor`, `DriverInstanceActor`, `VirtualTagActor`, `ScriptedAlarmActor`, `OpcUaPublishActor`, `HistorianAdapterActor`, `PeerOpcUaProbeActor`, `DbHealthProbeActor`.
|
||||
8. Migrate `Phase7Composer` to `OtOpcUa.OpcUaServer`; make it pure and unit-tested.
|
||||
9. Move Blazor components from `Admin` into `OtOpcUa.AdminUI` library; replace `DriverDiagnosticsClient` HTTP with in-process actor calls; rewire `FleetStatusHub` / `AlertHub` / `ScriptLogHub` to be fed by `FleetStatusBroadcaster` `IHubContext`.
|
||||
10. Build `OtOpcUa.Host` `Program.cs`: role-gated startup, health endpoints (`/health/ready`, `/health/active`, `/healthz`), `AddWindowsService`.
|
||||
11. ConfigDb migration: add `Deployment`, `ConfigEdit`, `DataProtectionKeys` tables; rename `ClusterNodeGenerationState` → `NodeDeploymentState`; drop `ConfigGeneration`; drop `ClusterNode.RedundancyRole`. EF migration + idempotent SQL script + `Migrate-To-V2.ps1`.
|
||||
12. Delete `OtOpcUa.Server`, `OtOpcUa.Admin`, `DriverInstanceBootstrapper`, `RedundancyCoordinator`, `RedundancyStatePublisher`, `ApplyLeaseRegistry`, `FleetStatusPoller`, `PeerHttpProbeLoop`, `HubTokenService`. Sweep any `*RedundancyRole*` references.
|
||||
13. Update `deploy/Install-Services.ps1`: single Windows Service per node, role via env var, Traefik service registration.
|
||||
14. Update docs in `docs/`: rewrite `Redundancy.md`, `ServiceHosting.md`; add `Cluster.md`, `ControlPlane.md`, `Runtime.md`. Add top-level `Architecture-v2.md` summary.
|
||||
15. CI: add integration test job for the 2-node cluster + OPC UA roundtrip.
|
||||
16. Tag the last v1 release on `main` for backport-only fixes. Merge `v2-akka-fuse` → `main` when GA.
|
||||
@@ -0,0 +1,716 @@
|
||||
# Akka Hosting Alignment — Gap Closeout Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use `superpowers-extended-cc:executing-plans` to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Close the four real/cosmetic gaps identified by the audit of `docs/plans/2026-05-26-akka-hosting-alignment-plan.md` so the v2 implementation matches the plan's literal contract (per-role appsettings overlays, explicit dual-endpoint visibility test, plan-prescribed filenames, removal of empty legacy directories).
|
||||
|
||||
**Architecture:** Additive only. No production-runtime semantics change. One small extension to `OpcUaApplicationHost` so the OPC UA server can advertise peer URIs in `Server.ServerArray` — gated on a new option, defaults to old behavior. Everything else is JSON, test code, file moves, and `rm -rf` of stale bin/obj trees.
|
||||
|
||||
**Tech Stack:** .NET 10, OPCFoundation .NET Standard SDK (`Opc.Ua.*`), xunit.v3, Shouldly, EF Core 10 (inherited; no schema changes).
|
||||
|
||||
**Source plan:** `docs/plans/2026-05-26-akka-hosting-alignment-plan.md`. The audit findings closed by this plan map to Tasks 54, 59, 60, and the post-Task-56 cosmetic cleanup. **Read the source plan's "Conventions for every task" block — those rules still apply here.**
|
||||
|
||||
**Branch:** `v2-gap-closeout` off `master`.
|
||||
|
||||
---
|
||||
|
||||
## Conventions for every task
|
||||
|
||||
- **Branch:** Stay on `v2-gap-closeout`. Never commit to `master` while plan is running.
|
||||
- **Build command:** `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — must be green before commit.
|
||||
- **Test command:** `dotnet test ZB.MOM.WW.OtOpcUa.slnx --no-build` — relevant new/changed tests must pass.
|
||||
- **Commit format:** Conventional Commits matching the source plan — `feat(host):`, `test(opcua):`, `chore(cleanup):`, `refactor(test):`, etc.
|
||||
- **Mac compatibility:** All code must build on macOS. The new dual-endpoint test boots two real OPC UA servers on loopback — works on macOS (no Windows-only APIs needed; PKI is created under a per-test temp dir).
|
||||
|
||||
---
|
||||
|
||||
## Task 0: Add three role-overlay appsettings files (Task 54 gap)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 1, Task 5, Task 6
|
||||
|
||||
**Files:**
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin.json`
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.driver.json`
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin-driver.json`
|
||||
|
||||
**Background:**
|
||||
`Program.cs` line 33-35 loads `appsettings.{role-suffix}.json` where the suffix is the roles joined alphabetically with `'-'`. Today the loader passes `optional: true`, so the host boots without these files — but the source plan (Task 54) called them out as required scaffolding so operators have per-role tunable defaults.
|
||||
|
||||
Suffix matrix:
|
||||
| `OTOPCUA_ROLES` env | Loaded file |
|
||||
|---|---|
|
||||
| `admin` | `appsettings.admin.json` |
|
||||
| `driver` | `appsettings.driver.json` |
|
||||
| `admin,driver` (any order) | `appsettings.admin-driver.json` (joined alphabetical) |
|
||||
|
||||
**Step 1: Create `appsettings.admin.json`**
|
||||
|
||||
Admin-only nodes don't bind drivers; tighten Serilog and disable the LDAP dev stub by default.
|
||||
|
||||
```json
|
||||
{
|
||||
"Serilog": {
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Akka": "Information"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Security": {
|
||||
"Ldap": {
|
||||
"DevStubMode": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Create `appsettings.driver.json`**
|
||||
|
||||
Driver-only nodes have no Admin UI; raise OPC UA verbosity slightly so per-node diagnostics flow to logs.
|
||||
|
||||
```json
|
||||
{
|
||||
"Serilog": {
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Opc.Ua": "Debug",
|
||||
"Akka": "Information"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Security": {
|
||||
"Ldap": {
|
||||
"DevStubMode": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 3: Create `appsettings.admin-driver.json`**
|
||||
|
||||
Combined-role nodes (the docker-dev compose default + the integration test harness) — turn on both surfaces with shared defaults.
|
||||
|
||||
```json
|
||||
{
|
||||
"Serilog": {
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Opc.Ua": "Information",
|
||||
"Akka": "Information"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Security": {
|
||||
"Ldap": {
|
||||
"DevStubMode": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4: Build green check**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: succeeds. (JSON files do not break the build; this is a smoke check that nothing else regressed.)
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin.json \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.driver.json \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.admin-driver.json
|
||||
git commit -m "feat(host): add per-role appsettings overlays for admin/driver/admin-driver"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Extend `OpcUaApplicationHost` with `PeerApplicationUris` + populate `Server.ServerArray`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 0, Task 5, Task 6
|
||||
|
||||
**Files:**
|
||||
- Modify: `/Users/dohertj2/Desktop/OtOpcUa/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs` (add option + post-start population)
|
||||
- Test: `/Users/dohertj2/Desktop/OtOpcUa/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/OpcUaApplicationHostServerArrayTests.cs`
|
||||
|
||||
**Background:**
|
||||
The source plan's Task 60 promised a test where "real OPCFoundation client → both endpoints visible in ServerUriArray". That requires production code to populate the peer URIs onto each server's `Server.ServerArray` (NodeId i=2254) property. No such code exists in v2 today — this task adds it as an opt-in option so existing single-node tests keep their current behavior. Task 3 then writes the integration test that drives it across two servers.
|
||||
|
||||
**Step 1: Write the failing unit test**
|
||||
|
||||
Create `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/OpcUaApplicationHostServerArrayTests.cs`:
|
||||
|
||||
```csharp
|
||||
using System.IO;
|
||||
using System.Net.Sockets;
|
||||
using System.Net;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Server;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Audit gap closeout — verifies <see cref="OpcUaApplicationHostOptions.PeerApplicationUris"/>
|
||||
/// is reflected in <c>Server.ServerArray</c> after start. Single-server in-process check; the
|
||||
/// cross-server visibility check lives in <c>OtOpcUa.OpcUaServer.IntegrationTests</c>.
|
||||
/// </summary>
|
||||
public sealed class OpcUaApplicationHostServerArrayTests
|
||||
{
|
||||
[Fact]
|
||||
public async Task ServerArray_contains_local_uri_and_configured_peers_after_start()
|
||||
{
|
||||
var pkiRoot = Path.Combine(Path.GetTempPath(), $"otopcua-pki-{Guid.NewGuid():N}");
|
||||
try
|
||||
{
|
||||
var options = new OpcUaApplicationHostOptions
|
||||
{
|
||||
ApplicationName = "OtOpcUa.UnitTest",
|
||||
ApplicationUri = "urn:OtOpcUa.UnitTest.NodeA",
|
||||
OpcUaPort = AllocateFreePort(),
|
||||
PublicHostname = "127.0.0.1",
|
||||
PkiStoreRoot = pkiRoot,
|
||||
PeerApplicationUris = new[] { "urn:OtOpcUa.UnitTest.NodeB" },
|
||||
};
|
||||
|
||||
var server = new StandardServer();
|
||||
await using var host = new OpcUaApplicationHost(options, NullLogger<OpcUaApplicationHost>.Instance);
|
||||
await host.StartAsync(server, CancellationToken.None);
|
||||
|
||||
var serverArray = server.CurrentInstance.ServerObject.ServerArray.Value;
|
||||
serverArray.ShouldNotBeNull();
|
||||
serverArray.ShouldContain("urn:OtOpcUa.UnitTest.NodeA");
|
||||
serverArray.ShouldContain("urn:OtOpcUa.UnitTest.NodeB");
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (Directory.Exists(pkiRoot)) Directory.Delete(pkiRoot, recursive: true);
|
||||
}
|
||||
}
|
||||
|
||||
private static int AllocateFreePort()
|
||||
{
|
||||
var listener = new TcpListener(IPAddress.Loopback, 0);
|
||||
listener.Start();
|
||||
var port = ((IPEndPoint)listener.LocalEndpoint).Port;
|
||||
listener.Stop();
|
||||
return port;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run the test — confirm it fails**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests --filter "FullyQualifiedName~OpcUaApplicationHostServerArrayTests"`
|
||||
Expected: FAIL with `PeerApplicationUris` not found (compile error) — the option doesn't exist yet.
|
||||
|
||||
**Step 3: Add the option**
|
||||
|
||||
Edit `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs`. Add to `OpcUaApplicationHostOptions` (after `AutoAcceptUntrustedClientCertificates`, around line 65):
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Peer server URIs published in <c>Server.ServerArray</c> after start, in addition to
|
||||
/// the local <see cref="ApplicationUri"/>. Empty by default — set this on warm-redundancy
|
||||
/// deployments so OPC UA clients can discover the partner endpoint via the standard
|
||||
/// Server.ServerArray property (NodeId i=2254). Order does not matter; the local URI
|
||||
/// is always element 0.
|
||||
/// </summary>
|
||||
public IList<string> PeerApplicationUris { get; set; } = new List<string>();
|
||||
```
|
||||
|
||||
**Step 4: Populate `Server.ServerArray` after start**
|
||||
|
||||
Edit `OpcUaApplicationHost.StartAsync` (around line 100-118). After the `_application.Start(server)` call and before the log line, insert:
|
||||
|
||||
```csharp
|
||||
PopulateServerArray();
|
||||
```
|
||||
|
||||
Then add the private method below `AttachUserAuthenticator`:
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Writes the union of <see cref="OpcUaApplicationHostOptions.ApplicationUri"/> and
|
||||
/// <see cref="OpcUaApplicationHostOptions.PeerApplicationUris"/> to the OPC UA standard
|
||||
/// <c>Server.ServerArray</c> property (NodeId i=2254). Clients in a warm-redundancy
|
||||
/// deployment discover the partner endpoint by reading this property.
|
||||
/// </summary>
|
||||
private void PopulateServerArray()
|
||||
{
|
||||
var serverObject = _server?.CurrentInstance?.ServerObject;
|
||||
if (serverObject is null) return;
|
||||
|
||||
var uris = new List<string> { _options.ApplicationUri };
|
||||
foreach (var peer in _options.PeerApplicationUris)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(peer) && !uris.Contains(peer))
|
||||
uris.Add(peer);
|
||||
}
|
||||
serverObject.ServerArray.Value = uris.ToArray();
|
||||
}
|
||||
```
|
||||
|
||||
**Step 5: Run the test — confirm it passes**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests --filter "FullyQualifiedName~OpcUaApplicationHostServerArrayTests"`
|
||||
Expected: PASS. If `ServerObject.ServerArray.Value` is read-only (some SDK versions guard it), fall back to writing through `ServerArrayNode.Value` via the address-space accessor — but try the direct write first; the SDK exposes it as a settable BaseDataVariableState on `ServerObjectState`.
|
||||
|
||||
**Step 6: Run full OpcUaServer.Tests suite to confirm no regression**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests`
|
||||
Expected: all tests pass — `PopulateServerArray` is additive when `PeerApplicationUris` is empty (default), so existing tests don't change behavior.
|
||||
|
||||
**Step 7: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OpcUaApplicationHost.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/OpcUaApplicationHostServerArrayTests.cs
|
||||
git commit -m "feat(opcua): OpcUaApplicationHost publishes peer URIs in Server.ServerArray"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Create `OtOpcUa.OpcUaServer.IntegrationTests` project
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 5, Task 6 (file moves elsewhere)
|
||||
**Depends on:** none (csproj is self-contained)
|
||||
|
||||
**Files:**
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests.csproj`
|
||||
- Modify: `/Users/dohertj2/Desktop/OtOpcUa/ZB.MOM.WW.OtOpcUa.slnx` (add the project)
|
||||
|
||||
**Background:**
|
||||
The source plan's Task 60 named this exact project. Audit found ServiceLevel coverage relocated to other test projects but no `OpcUaServer.IntegrationTests` project exists. Creating the project skeleton in its own task keeps Task 3's commit focused on the test code.
|
||||
|
||||
**Step 1: Create the csproj**
|
||||
|
||||
Mirror the conventions in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests.csproj`. The integration project needs the `Opc.Ua.Client` package (vs. only `Opc.Ua.Server` in the unit tests) — confirm the version against the existing client CLI's csproj: `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI/ZB.MOM.WW.OtOpcUa.Client.CLI.csproj`.
|
||||
|
||||
```xml
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<IsPackable>false</IsPackable>
|
||||
<IsTestProject>true</IsTestProject>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests</RootNamespace>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="xunit.v3"/>
|
||||
<PackageReference Include="Shouldly"/>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Client"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Configuration"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions"/>
|
||||
<PackageReference Include="xunit.runner.visualstudio">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\src\Server\ZB.MOM.WW.OtOpcUa.OpcUaServer\ZB.MOM.WW.OtOpcUa.OpcUaServer.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
```
|
||||
|
||||
If `OPCFoundation.NetStandard.Opc.Ua.Client` isn't in `Directory.Packages.props`, add it there (mirror the existing `OPCFoundation.NetStandard.Opc.Ua.Server` version exactly).
|
||||
|
||||
**Step 2: Add project to the solution**
|
||||
|
||||
Run: `dotnet sln ZB.MOM.WW.OtOpcUa.slnx add tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests.csproj`
|
||||
Expected: "Project added to the solution."
|
||||
|
||||
**Step 3: Build green check**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: builds. (Empty project, so no test discovery yet — `dotnet test` would say "no tests".)
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/ \
|
||||
ZB.MOM.WW.OtOpcUa.slnx \
|
||||
Directory.Packages.props # only if the Opc.Ua.Client version was added there
|
||||
git commit -m "test(opcua): scaffold OtOpcUa.OpcUaServer.IntegrationTests project"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 3: `DualEndpointTests` — real OPC UA client reads both URIs from `Server.ServerArray`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 5, Task 6
|
||||
**Depends on:** Task 1 (PeerApplicationUris wiring), Task 2 (IT project exists)
|
||||
|
||||
**Files:**
|
||||
- Create: `/Users/dohertj2/Desktop/OtOpcUa/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs`
|
||||
|
||||
**Background:**
|
||||
This is the explicit Task 60 deliverable: a real OPC UA client connects to one server and confirms it can discover the partner via `Server.ServerArray`. Single-server unit-side coverage exists in Task 1; this test exercises the wire path with both servers up.
|
||||
|
||||
**Step 1: Write the test**
|
||||
|
||||
```csharp
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Client;
|
||||
using Opc.Ua.Configuration;
|
||||
using Opc.Ua.Server;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests;
|
||||
|
||||
/// <summary>
|
||||
/// Source plan Task 60 — closes the audit gap. Boots two real <see cref="StandardServer"/>
|
||||
/// instances on loopback, each configured with the other's <c>ApplicationUri</c> in
|
||||
/// <see cref="OpcUaApplicationHostOptions.PeerApplicationUris"/>. A real OPC UA client connects
|
||||
/// to Node A, reads <c>Server.ServerArray</c>, and asserts both URIs are visible — the
|
||||
/// warm-redundancy discovery contract clients depend on.
|
||||
/// </summary>
|
||||
public sealed class DualEndpointTests
|
||||
{
|
||||
private const string NodeAUri = "urn:OtOpcUa.DualEndpoint.NodeA";
|
||||
private const string NodeBUri = "urn:OtOpcUa.DualEndpoint.NodeB";
|
||||
|
||||
[Fact]
|
||||
public async Task Client_reads_both_ApplicationUris_from_NodeA_ServerArray()
|
||||
{
|
||||
var pkiRootA = Path.Combine(Path.GetTempPath(), $"otopcua-pki-a-{Guid.NewGuid():N}");
|
||||
var pkiRootB = Path.Combine(Path.GetTempPath(), $"otopcua-pki-b-{Guid.NewGuid():N}");
|
||||
var portA = AllocateFreePort();
|
||||
var portB = AllocateFreePort();
|
||||
|
||||
try
|
||||
{
|
||||
await using var nodeA = await StartNodeAsync(NodeAUri, portA, pkiRootA, peers: new[] { NodeBUri });
|
||||
await using var nodeB = await StartNodeAsync(NodeBUri, portB, pkiRootB, peers: new[] { NodeAUri });
|
||||
|
||||
var serverArray = await ReadServerArrayAsync($"opc.tcp://127.0.0.1:{portA}/OtOpcUa");
|
||||
serverArray.ShouldContain(NodeAUri);
|
||||
serverArray.ShouldContain(NodeBUri);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (Directory.Exists(pkiRootA)) Directory.Delete(pkiRootA, recursive: true);
|
||||
if (Directory.Exists(pkiRootB)) Directory.Delete(pkiRootB, recursive: true);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<OpcUaApplicationHost> StartNodeAsync(
|
||||
string applicationUri, int port, string pkiRoot, string[] peers)
|
||||
{
|
||||
var options = new OpcUaApplicationHostOptions
|
||||
{
|
||||
ApplicationName = applicationUri, // unique per node — SDK uses it for cert CN
|
||||
ApplicationUri = applicationUri,
|
||||
OpcUaPort = port,
|
||||
PublicHostname = "127.0.0.1",
|
||||
PkiStoreRoot = pkiRoot,
|
||||
EnabledSecurityProfiles = new List<OpcUaSecurityProfile> { OpcUaSecurityProfile.None },
|
||||
AutoAcceptUntrustedClientCertificates = true,
|
||||
PeerApplicationUris = peers,
|
||||
};
|
||||
var server = new StandardServer();
|
||||
var host = new OpcUaApplicationHost(options, NullLogger<OpcUaApplicationHost>.Instance);
|
||||
await host.StartAsync(server, CancellationToken.None);
|
||||
return host;
|
||||
}
|
||||
|
||||
private static async Task<string[]> ReadServerArrayAsync(string endpointUrl)
|
||||
{
|
||||
var appConfig = new ApplicationConfiguration
|
||||
{
|
||||
ApplicationName = "OtOpcUa.DualEndpointClient",
|
||||
ApplicationUri = $"urn:OtOpcUa.DualEndpointClient.{Guid.NewGuid():N}",
|
||||
ApplicationType = ApplicationType.Client,
|
||||
SecurityConfiguration = new SecurityConfiguration
|
||||
{
|
||||
ApplicationCertificate = new CertificateIdentifier(),
|
||||
AutoAcceptUntrustedCertificates = true,
|
||||
},
|
||||
ClientConfiguration = new ClientConfiguration { DefaultSessionTimeout = 60_000 },
|
||||
CertificateValidator = new CertificateValidator(),
|
||||
};
|
||||
await appConfig.Validate(ApplicationType.Client);
|
||||
appConfig.CertificateValidator.CertificateValidation += (_, e) => e.Accept = true;
|
||||
|
||||
var endpoint = CoreClientUtils.SelectEndpoint(appConfig, endpointUrl, useSecurity: false);
|
||||
var endpointConfiguration = EndpointConfiguration.Create(appConfig);
|
||||
var configuredEndpoint = new ConfiguredEndpoint(null, endpoint, endpointConfiguration);
|
||||
|
||||
using var session = await Session.Create(
|
||||
appConfig, configuredEndpoint, updateBeforeConnect: false,
|
||||
sessionName: "DualEndpointTests", sessionTimeout: 60_000,
|
||||
identity: new UserIdentity(new AnonymousIdentityToken()),
|
||||
preferredLocales: null);
|
||||
|
||||
var value = session.ReadValue(VariableIds.Server_ServerArray);
|
||||
return (string[])value.Value;
|
||||
}
|
||||
|
||||
private static int AllocateFreePort()
|
||||
{
|
||||
var listener = new TcpListener(IPAddress.Loopback, 0);
|
||||
listener.Start();
|
||||
var port = ((IPEndPoint)listener.LocalEndpoint).Port;
|
||||
listener.Stop();
|
||||
return port;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run the test**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests`
|
||||
Expected: PASS. Wall-time ~3-5 s (two cert-creation cycles + session handshake).
|
||||
|
||||
If the test hangs on the session handshake on first run, it's the SDK reading the trusted-cert store — bumping `AutoAcceptUntrustedClientCertificates = true` on both server hosts (already set above) should resolve it. If `CoreClientUtils.SelectEndpoint` throws because the SDK version uses a different overload, fall back to constructing the `EndpointDescription` directly with `EndpointUrl = endpointUrl, SecurityMode = MessageSecurityMode.None, SecurityPolicyUri = SecurityPolicies.None` and skipping `SelectEndpoint`.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/DualEndpointTests.cs
|
||||
git commit -m "test(opcua): DualEndpointTests — real client reads peer URIs from Server.ServerArray"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Wire `OtOpcUa.OpcUaServer.IntegrationTests` into v2-ci.yml
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 5, Task 6
|
||||
**Depends on:** Task 3 (project must exist + have a real test before CI runs it)
|
||||
|
||||
**Files:**
|
||||
- Modify: `/Users/dohertj2/Desktop/OtOpcUa/.github/workflows/v2-ci.yml`
|
||||
|
||||
**Step 1: Add the project to the `integration` job**
|
||||
|
||||
Either extend the existing `integration` job to run a second `dotnet test` step, or convert it to a matrix. Prefer a matrix for symmetry with `unit-tests`:
|
||||
|
||||
Open `.github/workflows/v2-ci.yml`, locate the `integration:` job. Replace it with:
|
||||
|
||||
```yaml
|
||||
integration:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
project:
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests
|
||||
- tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
- name: dotnet test ${{ matrix.project }}
|
||||
run: dotnet test ${{ matrix.project }} --configuration Release --filter "Category!=E2E"
|
||||
```
|
||||
|
||||
**Step 2: Build green check**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests --configuration Release --filter "Category!=E2E"`
|
||||
Expected: matches the exact CI command — passes locally so CI will pass too.
|
||||
|
||||
**Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add .github/workflows/v2-ci.yml
|
||||
git commit -m "ci(v2): include OpcUaServer.IntegrationTests in integration matrix"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Rename `FailoverScenarioTests` → `FailoverDuringDeployTests` (Task 59 cosmetic)
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** Task 0, Task 1, Task 2, Task 6 (different files)
|
||||
|
||||
**Files:**
|
||||
- Rename: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverScenarioTests.cs` → `FailoverDuringDeployTests.cs`
|
||||
- Modify: class name + namespace-internal references
|
||||
|
||||
**Step 1: Rename the file and the class**
|
||||
|
||||
```bash
|
||||
git mv tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverScenarioTests.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverDuringDeployTests.cs
|
||||
```
|
||||
|
||||
Then edit `FailoverDuringDeployTests.cs` and replace the single class declaration `public sealed class FailoverScenarioTests` with `public sealed class FailoverDuringDeployTests`. Use Edit, not sed — the file only declares this class once (`grep -c "FailoverScenario" .` ≤ 2).
|
||||
|
||||
**Step 2: Sweep for any stale references**
|
||||
|
||||
Run: `grep -rln "FailoverScenarioTests" .`
|
||||
Expected: zero matches after Step 1. If anything appears (e.g., a CI filter, a doc), fix the reference.
|
||||
|
||||
**Step 3: Build + run test**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests --filter "FullyQualifiedName~FailoverDuringDeployTests"`
|
||||
Expected: same tests pass that previously passed under the old name.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/FailoverDuringDeployTests.cs
|
||||
git commit -m "refactor(test): rename FailoverScenarioTests → FailoverDuringDeployTests for plan parity"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 6: Delete empty bin/obj-only legacy directories
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~2 min
|
||||
**Parallelizable with:** Task 0, Task 1, Task 2, Task 5
|
||||
|
||||
**Files:**
|
||||
- Delete: `src/Server/ZB.MOM.WW.OtOpcUa.Server/`
|
||||
- Delete: `src/Server/ZB.MOM.WW.OtOpcUa.Admin/`
|
||||
- Delete: `tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/`
|
||||
- Delete: `tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests/`
|
||||
- Delete: `tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests/`
|
||||
|
||||
**Background:**
|
||||
Source plan Task 56 deleted the projects from `ZB.MOM.WW.OtOpcUa.slnx` (confirmed by the audit) but left `bin/`+`obj/` shells on disk. These confuse new contributors and skew directory listings. None of them are referenced anywhere.
|
||||
|
||||
**Step 1: Sanity-check that each directory is bin/obj-only**
|
||||
|
||||
```bash
|
||||
for dir in \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Server \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Admin \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests; do
|
||||
echo "--- $dir ---"
|
||||
find "$dir" -maxdepth 2 -type f | grep -v "/bin/\|/obj/"
|
||||
done
|
||||
```
|
||||
|
||||
Expected: every section is empty (no source files leak out). If any source file shows, STOP and surface it — don't delete blindly.
|
||||
|
||||
**Step 2: Verify slnx doesn't reference them**
|
||||
|
||||
Run: `grep -nE 'ZB\.MOM\.WW\.OtOpcUa\.(Server|Admin)(/|\.Tests|\.E2ETests)' ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: zero matches.
|
||||
|
||||
**Step 3: Delete the directories**
|
||||
|
||||
```bash
|
||||
rm -rf src/Server/ZB.MOM.WW.OtOpcUa.Server \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Admin \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests
|
||||
```
|
||||
|
||||
**Step 4: Build green check**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: succeeds (these directories were already out of the solution).
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add -A
|
||||
git commit -m "chore(cleanup): remove stale bin/obj shells for deleted v1 Server/Admin projects"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Final build + test green check
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (verification, depends on all prior tasks)
|
||||
|
||||
**Step 1: Restore + build**
|
||||
|
||||
Run: `dotnet build ZB.MOM.WW.OtOpcUa.slnx`
|
||||
Expected: 0 errors, 0 warnings (TreatWarningsAsErrors is on across the solution).
|
||||
|
||||
**Step 2: Run the full test suite**
|
||||
|
||||
Run: `dotnet test ZB.MOM.WW.OtOpcUa.slnx --no-build`
|
||||
Expected: all tests green. Specifically confirm:
|
||||
- `OpcUaApplicationHostServerArrayTests` (Task 1) — pass
|
||||
- `DualEndpointTests` (Task 3) — pass
|
||||
- `FailoverDuringDeployTests` (Task 5) — same count of tests pass as before the rename
|
||||
|
||||
**Step 3: Smoke check the audit assertions**
|
||||
|
||||
Run:
|
||||
```bash
|
||||
ls src/Server/ZB.MOM.WW.OtOpcUa.Host/appsettings.*.json
|
||||
find tests/Server -iname "DualEndpointTests.cs" -o -iname "FailoverDuringDeployTests.cs"
|
||||
ls -la src/Server/ZB.MOM.WW.OtOpcUa.{Server,Admin} 2>/dev/null
|
||||
```
|
||||
|
||||
Expected:
|
||||
- 4 appsettings files: `.json`, `.Development.json`, `.admin.json`, `.admin-driver.json`, `.driver.json`
|
||||
- Both renamed/new test files exist
|
||||
- The two `ls -la` calls return errors (directories gone)
|
||||
|
||||
**Step 4: No commit unless cleanup turned up**
|
||||
|
||||
If anything failed in Steps 1-3, fix it as a follow-up task — do not paper over with a `--no-verify` commit.
|
||||
|
||||
---
|
||||
|
||||
## Final verification
|
||||
|
||||
After Task 7:
|
||||
|
||||
1. `dotnet build ZB.MOM.WW.OtOpcUa.slnx` — green
|
||||
2. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --no-build` — green (incl. 2 new tests)
|
||||
3. `git log --oneline master..HEAD` — exactly 6 commits, Conventional-Commits style
|
||||
4. Open PR `v2-gap-closeout` → `master` titled "v2: close audit gaps — appsettings overlays, DualEndpointTests, cleanup"
|
||||
|
||||
---
|
||||
|
||||
## Task index
|
||||
|
||||
| # | Title | Class | Time | Parallel with |
|
||||
|---|---|---|---|---|
|
||||
| 0 | Per-role appsettings overlays | small | 3m | 1, 5, 6 |
|
||||
| 1 | OpcUaApplicationHost.PeerApplicationUris + ServerArray | standard | 5m | 0, 5, 6 |
|
||||
| 2 | OpcUaServer.IntegrationTests project skeleton | small | 4m | 5, 6 |
|
||||
| 3 | DualEndpointTests | standard | 5m | 5, 6 |
|
||||
| 4 | CI matrix entry for new IT project | small | 3m | 5, 6 |
|
||||
| 5 | Rename FailoverScenarioTests → FailoverDuringDeployTests | trivial | 2m | 0, 1, 2, 6 |
|
||||
| 6 | Delete stale bin/obj-only directories | trivial | 2m | 0, 1, 2, 5 |
|
||||
| 7 | Final build + test green check | trivial | 3m | none |
|
||||
|
||||
**Total estimated subagent time:** ~27 min.
|
||||
|
||||
**Dependency graph (non-parallel pairs):**
|
||||
- Task 3 depends on Task 1 (option must exist) and Task 2 (project must exist)
|
||||
- Task 4 depends on Task 3 (CI runs the project's tests)
|
||||
- Task 7 depends on all prior tasks
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-26-akka-hosting-alignment-gaps-closeout.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 0: Per-role appsettings overlays", "status": "completed", "commit": "898a477"},
|
||||
{"id": 2, "subject": "Task 1: OpcUaApplicationHost.PeerApplicationUris + ServerArray population", "status": "completed", "commits": ["70ffd28", "cb936db"]},
|
||||
{"id": 3, "subject": "Task 2: OpcUaServer.IntegrationTests project skeleton", "status": "completed", "commit": "83eda9e"},
|
||||
{"id": 4, "subject": "Task 3: DualEndpointTests — real OPC UA client reads both URIs from Server.ServerArray", "status": "completed", "commits": ["dce2528", "a5412c1", "cb936db"], "blockedBy": ["2", "3"]},
|
||||
{"id": 5, "subject": "Task 4: Wire OpcUaServer.IntegrationTests into v2-ci.yml", "status": "completed", "commit": "e8c4f18", "blockedBy": ["4"]},
|
||||
{"id": 6, "subject": "Task 5: Rename FailoverScenarioTests → FailoverDuringDeployTests", "status": "completed", "commit": "25ce111"},
|
||||
{"id": 7, "subject": "Task 6: Delete empty bin/obj-only legacy directories", "status": "completed", "commit": "(no tracked changes — bin/obj only)"},
|
||||
{"id": 8, "subject": "Task 7: Final build + test green check", "status": "completed", "blockedBy": ["1", "2", "3", "4", "5", "6", "7"]}
|
||||
],
|
||||
"lastUpdated": "2026-05-26T00:00:00Z",
|
||||
"finalReview": "approved",
|
||||
"branchHead": "e8c4f18",
|
||||
"branchCommitCount": 8
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,101 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-05-26-akka-hosting-alignment-plan.md",
|
||||
"branch": "v2-akka-fuse",
|
||||
"designDoc": "docs/plans/2026-05-26-akka-hosting-alignment-design.md",
|
||||
"lastUpdated": "2026-05-26T00:00:00Z",
|
||||
"tasks": [
|
||||
{"id": 0, "subject": "Task 0: Create branch and central package management", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [], "commit": "2b81147"},
|
||||
{"id": 1, "subject": "Task 1: Create OtOpcUa.Commons project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [2,3,4,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 2, "subject": "Task 2: Create OtOpcUa.Cluster project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,3,4,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 3, "subject": "Task 3: Create OtOpcUa.Security project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,4,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 4, "subject": "Task 4: Create OtOpcUa.ControlPlane project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,5,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 5, "subject": "Task 5: Create OtOpcUa.Runtime project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,4,6,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 6, "subject": "Task 6: Create OtOpcUa.OpcUaServer project", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,4,5,7,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 7, "subject": "Task 7: Create OtOpcUa.AdminUI Razor class library", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [1,2,3,4,5,6,8], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 8, "subject": "Task 8: Create OtOpcUa.Host Web SDK project", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": [1,2,3,4,5,6,7], "blockedBy": [0], "commit": "30a2104"},
|
||||
{"id": 9, "subject": "Task 9: Build green smoke check", "status": "completed", "classification": "trivial", "estMinutes": 2, "parallelizableWith": [], "blockedBy": [1,2,3,4,5,6,7,8], "commit": "30a2104"},
|
||||
{"id": 10, "subject": "Task 10: Add Deployment entity", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [11,12,13], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": 11, "subject": "Task 11: Add NodeDeploymentState entity", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [10,12,13], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": 12, "subject": "Task 12: Add ConfigEdit audit entity", "status": "completed", "classification": "small", "estMinutes": 4, "parallelizableWith": [10,11,13], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": 13, "subject": "Task 13: Add DataProtection keys table", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [10,11,12], "blockedBy": [9], "commit": "8e2c4f2"},
|
||||
{"id": "14a", "subject": "Task 14a: Add RowVersion to live-edit entities", "status": "completed", "classification": "standard", "estMinutes": 10, "parallelizableWith": [], "blockedBy": [13], "commit": "4bb4ad8"},
|
||||
{"id": "14b", "subject": "Task 14b: Decouple live-edit entities from ConfigGeneration", "status": "completed", "classification": "high-risk", "estMinutes": 30, "parallelizableWith": [], "blockedBy": ["14a"], "commit": "13d3aea"},
|
||||
{"id": "14c", "subject": "Task 14c: Obsolete GenerationApplier/Diff/SealedCache", "status": "completed", "classification": "high-risk", "estMinutes": 20, "parallelizableWith": [], "blockedBy": ["14b"], "commit": "1ddf8bb"},
|
||||
{"id": "14d", "subject": "Task 14d: Drop ClusterNode.RedundancyRole", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": ["14a","14b","14c"], "blockedBy": [13], "commit": "3c915e6"},
|
||||
{"id": "14e", "subject": "Task 14e: Delete ConfigGeneration + ClusterNodeGenerationState", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": [], "blockedBy": ["14b","14c"], "commit": "e00f46d"},
|
||||
{"id": "14f", "subject": "Task 14f: V2HostingAlignment EF migration (consolidator)", "status": "completed", "classification": "high-risk", "estMinutes": 15, "parallelizableWith": [], "blockedBy": ["14a","14b","14c","14d","14e"], "commit": "605dbf3"},
|
||||
{"id": 15, "subject": "Task 15: Migrate-To-V2.ps1 idempotent script", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [16,17,18], "blockedBy": ["14f"], "commit": "c168c1c"},
|
||||
{"id": 16, "subject": "Task 16: Common types (CorrelationId, ExecutionId, NodeId, ...)", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [17,18], "blockedBy": [9], "commit": "fee4a8c"},
|
||||
{"id": 17, "subject": "Task 17: Akka message contracts", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [16,18], "blockedBy": [16], "commit": "5d3a5a4"},
|
||||
{"id": 18, "subject": "Task 18: Common interfaces", "status": "completed", "classification": "small", "estMinutes": 4, "parallelizableWith": [16,17], "blockedBy": [16], "commit": "136234e"},
|
||||
{"id": 19, "subject": "Task 19: HOCON config", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [20,21,22], "blockedBy": [2], "commit": "3d0f4dc"},
|
||||
{"id": 20, "subject": "Task 20: AkkaHostedService implementation", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [19,21,22], "blockedBy": [2,18], "commit": "f184f8e"},
|
||||
{"id": 21, "subject": "Task 21: Role parser from OTOPCUA_ROLES env", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [19,20,22], "blockedBy": [2], "commit": "dfb0636"},
|
||||
{"id": 22, "subject": "Task 22: ClusterRoleInfo implementation", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [19,20,21], "blockedBy": [18,20], "commit": "c217c49"},
|
||||
{"id": 23, "subject": "Task 23: Cluster test project + tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [19,20,21,22], "commit": "e0b6d56"},
|
||||
{"id": 24, "subject": "Task 24: Move LdapAuthService into OtOpcUa.Security", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [25], "blockedBy": [3], "commit": "567b8ca"},
|
||||
{"id": 25, "subject": "Task 25: JwtTokenService", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [24], "blockedBy": [3], "commit": "93316e3"},
|
||||
{"id": 26, "subject": "Task 26: Cookie+JWT hybrid AddOtOpcUaAuth extension", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [27,28], "blockedBy": [13,24,25], "commit": "207fc6a"},
|
||||
{"id": 27, "subject": "Task 27: /auth/login, /auth/ping, /auth/token endpoints", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [26,28], "blockedBy": [24,25], "commit": "8be84ba"},
|
||||
{"id": 28, "subject": "Task 28: CookieAuthenticationStateProvider for Blazor", "status": "completed", "classification": "small", "estMinutes": 4, "parallelizableWith": [26,27], "blockedBy": [25], "commit": "e38f22e"},
|
||||
{"id": 29, "subject": "Task 29: Security test project + tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [24,25,26,27,28], "commit": "38ea0c5"},
|
||||
{"id": 30, "subject": "Task 30: ConfigPublishCoordinator happy path", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [32,33,34,35], "blockedBy": [4,17,18,10,11], "commit": "62e12da"},
|
||||
{"id": 31, "subject": "Task 31: Coordinator timeout + failover recovery", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [32,33,34,35], "blockedBy": [30], "commit": "f193872"},
|
||||
{"id": 32, "subject": "Task 32: AdminOperationsActor + StartDeployment", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [30,31,33,34,35], "blockedBy": [4,17,18,10,12], "commit": "ef683f5"},
|
||||
{"id": 33, "subject": "Task 33: AuditWriterActor batched idempotent insert", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [30,31,32,34,35], "blockedBy": [4,17], "commit": "23f669c"},
|
||||
{"id": 34, "subject": "Task 34: FleetStatusBroadcaster", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [30,31,32,33,35], "blockedBy": [4,17], "commit": "dd122c4"},
|
||||
{"id": 35, "subject": "Task 35: RedundancyStateActor + ServiceLevelCalculator", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [30,31,32,33,34], "blockedBy": [4,17,18], "commit": "6b37f99"},
|
||||
{"id": 36, "subject": "Task 36: Singleton registration extension (admin role)", "status": "completed", "classification": "standard", "estMinutes": 4, "parallelizableWith": [], "blockedBy": [30,31,32,33,34,35], "commit": "52bf4b3"},
|
||||
{"id": 37, "subject": "Task 37: DriverHostActor scaffolding + PreStart recovery", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [41,42,43,44], "blockedBy": [5,17,18,11], "commit": "ed13013"},
|
||||
{"id": 38, "subject": "Task 38: DriverHostActor DispatchDeployment handler", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [41,42,43,44], "blockedBy": [37], "commit": "ed13013"},
|
||||
{"id": 39, "subject": "Task 39: DriverHostActor stale-config fallback", "status": "completed", "classification": "standard", "estMinutes": 4, "parallelizableWith": [41,42,43,44], "blockedBy": [38], "commit": "ed13013"},
|
||||
{"id": 40, "subject": "Task 40: Runtime test project bootstrap", "status": "completed", "classification": "small", "estMinutes": 3, "parallelizableWith": [], "blockedBy": [37,38,39], "commit": "ed13013"},
|
||||
{"id": 41, "subject": "Task 41: DriverInstanceActor state machine", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [42,43,44], "blockedBy": [5,17,40], "commit": "64c627f"},
|
||||
{"id": 42, "subject": "Task 42: VirtualTagActor", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [41,43,44], "blockedBy": [5,17,40], "commit": "39729bf"},
|
||||
{"id": 43, "subject": "Task 43: ScriptedAlarmActor", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [41,42,44], "blockedBy": [5,17,40], "commit": "95ef533"},
|
||||
{"id": 44, "subject": "Task 44: OpcUaPublishActor on synchronized dispatcher", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [41,42,43], "blockedBy": [5,6,17,19,40], "commit": "e115f13"},
|
||||
{"id": 45, "subject": "Task 45: HistorianAdapter + PeerOpcUaProbe + DbHealthProbe actors", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [37,40], "commit": "28639cb"},
|
||||
{"id": 46, "subject": "Task 46: Extract OpcUaApplicationHost + Phase7Composer", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [6], "commit": "2877a88"},
|
||||
{"id": 47, "subject": "Task 47: Phase7Composer purity + property tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [48,49,50,51,52], "blockedBy": [46], "commit": "b7c117a"},
|
||||
{"id": 48, "subject": "Task 48: Move Blazor components into AdminUI library", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [47], "blockedBy": [7], "commit": "1a067e6"},
|
||||
{"id": 49, "subject": "Task 49: Move SignalR hubs and rewire to FleetStatusBroadcaster", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [50,51,52], "blockedBy": [34,48], "commit": "26d8f2f"},
|
||||
{"id": 50, "subject": "Task 50: IAdminOperationsClient via ClusterSingletonProxy", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [49,51,52], "blockedBy": [18,32,48], "commit": "f022499"},
|
||||
{"id": 51, "subject": "Task 51: Replace DriverDiagnosticsClient with IFleetDiagnosticsClient", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [49,50,52], "blockedBy": [18,48], "commit": "b83f099"},
|
||||
{"id": 52, "subject": "Task 52: Drift indicator + Deploy button UI", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [49,50,51], "blockedBy": [50,48], "commit": "f167808"},
|
||||
{"id": 53, "subject": "Task 53: Host Program.cs role-gated startup", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [54,55], "blockedBy": [8,15,20,21,22,26,36,40,45,46,48,49], "commit": "e2b357f"},
|
||||
{"id": 54, "subject": "Task 54: Health endpoints + appsettings layout", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [53,55], "blockedBy": [8,22], "commit": "fa1d685"},
|
||||
{"id": 55, "subject": "Task 55: Mac dev mode + DEV-STUB drivers", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [53,54], "blockedBy": [41], "commit": "8b4de80"},
|
||||
{"id": 56, "subject": "Task 56: Delete OtOpcUa.Server + OtOpcUa.Admin projects", "status": "completed", "classification": "high-risk", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [53,54,55], "commit": "76310b8"},
|
||||
{"id": 57, "subject": "Task 57: Build & test green check", "status": "completed", "classification": "trivial", "estMinutes": 3, "parallelizableWith": [], "blockedBy": [56], "commit": "76310b8"},
|
||||
{"id": 58, "subject": "Task 58: 2-node integration test harness", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [57], "commit": "d6fac2d", "deviation": "Also consolidated to a single Akka.Hosting ActorSystem — Program.cs ran two competing ActorSystems (custom AkkaHostedService + Akka.Hosting AddAkka). Cluster singletons landed on the bare one. Fixed in this commit; AkkaHostedService.cs deleted. docker-compose.yml (SQL+OpenLDAP for real local runs) deferred — harness uses EF in-memory."},
|
||||
{"id": 59, "subject": "Task 59: Deploy + failover integration tests", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [60], "blockedBy": [58], "commit": "5cfbe8b", "deviation": "Happy-path + idempotency landed. Failover scenarios (kill-mid-apply, split-brain, restart-during-deploy) deferred as F22 — they need node-down/restart primitives on the harness. Two production bugs fixed in this commit: (1) coordinator missing DPS subscription for ACKs, (2) NodeId collision on shared loopback host."},
|
||||
{"id": 60, "subject": "Task 60: OPC UA dual-endpoint + ServiceLevel tests", "status": "pending", "classification": "standard", "estMinutes": 5, "parallelizableWith": [59], "blockedBy": [58]},
|
||||
{"id": 61, "subject": "Task 61: E2E test infrastructure + GitHub Actions CI", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [59,60], "commit": "253fb60", "deviation": "CI workflow files landed but E2E test project (tests/Server/ZB.MOM.WW.OtOpcUa.E2ETests) deferred — it lands when F10/F11/F12 wire enough engine for an end-to-end round-trip to be meaningful. The E2E workflow runs against the docker-dev fleet but its --filter Category=E2E currently matches zero tests."},
|
||||
{"id": 62, "subject": "Task 62: Rewrite Install-Services.ps1", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [63,64,65], "blockedBy": [53], "commit": "e40615d"},
|
||||
{"id": 63, "subject": "Task 63: Traefik config + docker-dev compose", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [62,64,65], "blockedBy": [53], "commit": "7e3b56c", "deviation": "Untested on macOS (no local Docker). Compose file should work — exercise + adjust on first run against a real Docker host."},
|
||||
{"id": 64, "subject": "Task 64: Update existing docs (Redundancy, ServiceHosting, security)", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [62,63,65], "blockedBy": [57], "commit": "3c3fef9", "deviation": "Redundancy.md + ServiceHosting.md full rewrites. security.md v2 banner only — full per-section rewrite waits for F15 (Admin pages migration) since security.md references many pages that will move. README.md platform-overview updated."},
|
||||
{"id": 65, "subject": "Task 65: New v2 docs (Architecture-v2, Cluster, ControlPlane, Runtime)", "status": "completed", "classification": "standard", "estMinutes": 5, "parallelizableWith": [62,63,64], "blockedBy": [57], "commit": "1689901"},
|
||||
{"id": "F1", "subject": "Follow-up: AuthEndpoints integration tests against fused Host", "status": "completed", "classification": "small", "estMinutes": 10, "parallelizableWith": ["F2"], "blockedBy": [53], "commit": "463512d", "origin": "Deviation from Task 29 (commit 38ea0c5) — deferred until Task 53 wires AddOtOpcUaAuth/MapOtOpcUaAuth in Program. Add WebApplicationFactory<OtOpcUa.Host.Program> tests for /auth/login (204/401/503), /auth/ping (401/200), /auth/token (200+JWT), /auth/logout (204+cookie clear) using a stub ILdapAuthService.", "deviation": "Used HostBuilder + TestServer directly (Security.Tests/AuthEndpointsIntegrationTests) instead of WebApplicationFactory<Program> — Host needs Akka cluster bootstrap that's out of scope for this contract test. Cluster-mode auth coverage belongs in Task 58."},
|
||||
{"id": "F2", "subject": "Follow-up: Replace JwtBearer BuildServiceProvider antipattern with IPostConfigureOptions", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": ["F1"], "blockedBy": [], "commit": "45a8c79", "origin": "Deviation from Task 26 (commit 207fc6a) — AddOtOpcUaAuth uses services.BuildServiceProvider().CreateScope() inside .AddJwtBearer lambda (ASP0000). Refactor to IPostConfigureOptions<JwtBearerOptions> so validation parameters resolve lazily from the real request provider."},
|
||||
{"id": "F3", "subject": "Follow-up: Add EventId unique column to ConfigAuditLog for cross-restart audit idempotency", "status": "completed", "classification": "small", "estMinutes": 15, "parallelizableWith": ["F4"], "blockedBy": [], "commit": "f57f61d", "origin": "Deviation from Task 33 — AuditWriterActor only dedups in-buffer; ConfigAuditLog lacks EventId column so a duplicate AuditEvent that arrives after a flush becomes a duplicate row. Add nullable EventId Guid + filtered unique index, migration, and refactor AuditWriterActor.WrapDetails away."},
|
||||
{"id": "F4", "subject": "Follow-up: Harden AuditWriterActor.WrapDetails JSON synthesis with System.Text.Json", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": ["F3"], "blockedBy": [], "commit": "f57f61d", "deviation": "Moot — F3 deleted WrapDetails entirely (EventId/CorrelationId now live in dedicated columns).", "origin": "Self-review of Task 33 — WrapDetails uses string concat; malformed caller DetailsJson would produce invalid JSON and trip the CK_ConfigAuditLog_DetailsJson_IsJson constraint, killing the entire flush batch. Discard this task if F3 lands first (F3 removes WrapDetails entirely)."},
|
||||
{"id": "F5", "subject": "Follow-up: ConfigPublishCoordinator multi-node happy-path test", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "5cfbe8b", "deviation": "Delivered by Task 59 — DeployHappyPathTests.StartDeployment_seals_after_both_nodes_apply exercises the exact 'dispatch to N driver nodes, all ack, seals' flow via the real 2-node TwoNodeClusterHarness rather than a multi-system TestKit. Cleaner because it tests the production code path end-to-end.", "origin": "Self-review of Task 30 — single-ActorSystem TestKit can't simulate the plan's 'dispatch to N driver nodes, all ack, seals' happy path because DiscoverDriverNodes() needs real cluster membership. Add a multi-system test (two ActorSystems joined into one cluster, driver-role on the second)."},
|
||||
{"id": "F6", "subject": "Follow-up: RedundancyStateActor publisher abstraction so tests don't need DPS bootstrap", "status": "completed", "classification": "small", "estMinutes": 10, "parallelizableWith": [], "blockedBy": [], "commit": "dfc143c", "origin": "Self-review of Task 35 — RedundancyStateActorTests are skipped because single-node DistributedPubSub bootstrap is unreliable in TestKit. Inject an Action<object> broadcast so tests can replace it with a probe; un-skip both tests."},
|
||||
{"id": "F7", "subject": "Follow-up: DriverInstanceActor full engine wiring (subscriptions, writes, ApplyDelta diff)", "status": "completed", "classification": "standard", "estMinutes": 45, "parallelizableWith": [], "blockedBy": [44], "origin": "Self-review of Task 41 — subscription publishing, ApplyDelta diffing, bad-quality-on-disconnect, write path, and supervisor backoff are stubbed. Wire after OpcUaPublishActor lands.", "shipped": "All three pieces landed: (1) spawn lifecycle in DriverHostActor (DriverSpawnPlanner + IDriverFactory seam) — da14149, (2) ISubscribable wiring + OPC UA status-code → OpcUaQuality severity-bit mapping + DetachSubscription on disconnect/PostStop, (3) IWritable.WriteAsync write path with 5s timeout, status-code bubble-up, and AttributeValuePublished published to parent on every OnDataChange — both shipped in the F7-residual batch. Host DI binding (DriverFactoryBootstrap registers AbCip/AbLegacy/FOCAS/Galaxy/Modbus/S7/TwinCAT factories) lives in src/Server/ZB.MOM.WW.OtOpcUa.Host/Drivers/."},
|
||||
{"id": "F8", "subject": "Follow-up: VirtualTagActor engine wiring (compile expression, subscribe deps, publish result)", "status": "partial", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 42 — VirtualTagEngine.Evaluate not called; DependencyValueChanged just buffers.", "shipped": "(1) IVirtualTagEvaluator seam + NullVirtualTagEvaluator default. VirtualTagActor calls evaluator on DependencyValueChanged, dedupes unchanged results, emits EvaluationResult to parent, publishes Warning ScriptLogEntry on failure. (2) DependencyMuxActor in Runtime fans out DriverInstanceActor.AttributeValuePublished from DriverHostActor through to interested VirtualTagActor subscribers. VirtualTagActor takes dependencyRefs + mux ActorRef in Props, registers interest in PreStart, unregisters in PostStop. WithOtOpcUaRuntimeActors spawns the mux + threads it into DriverHostActor. Production binding to Core.VirtualTags.VirtualTagEngine (expression compile + dep extraction) still TODO — split as F8b."},
|
||||
{"id": "F9", "subject": "Follow-up: ScriptedAlarmActor engine wiring + state persistence", "status": "partial", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 43 — AlarmConditionService not called; PreRestart persistence to ScriptedAlarmState DB not wired; HistorianAdapter rows not emitted.", "shipped": "(1) IScriptedAlarmEvaluator seam + NullScriptedAlarmEvaluator default. ScriptedAlarmActor takes AlarmConfig (id/name/path/severity/predicate), evaluates on DependencyValueChanged, publishes AlarmTransitionEvent + ScriptLogEntry on every transition. (2) IAlarmActorStateStore seam in Commons.Engines + NullAlarmActorStateStore default + EfAlarmActorStateStore production adapter over the ScriptedAlarmState entity. ScriptedAlarmActor PreStart loads + restores; every Transition fires a fire-and-forget save with lastAckUser. Predicate binding to Core.ScriptedAlarms.ScriptedAlarmEngine still TODO — split as F9b."},
|
||||
{"id": "F10", "subject": "Follow-up: OpcUaPublishActor SDK integration (address-space writes + ServiceLevel + RebuildAddressSpace)", "status": "partial", "classification": "high-risk", "estMinutes": 60, "parallelizableWith": [], "blockedBy": [47], "origin": "Self-review of Task 44 — SDK calls stubbed; counters only. Wire after Phase 7 OpcUaServer extraction.", "shipped": "(1) IOpcUaAddressSpaceSink + IServiceLevelPublisher seams in Commons.OpcUa with Null* defaults. OpcUaPublishActor routes through the sink, dedupes ServiceLevelChanged, subscribes to redundancy-state DPS topic, maps redundancy snapshot to a coarse ServiceLevel (Primary+leader=240, Primary=200, Secondary=100, Detached=0). (2) OtOpcUaNodeManager (CustomNodeManager2) + OtOpcUaSdkServer (StandardServer subclass) + SdkAddressSpaceSink in OpcUaServer — lazy variable creation on first WriteValue, WriteAlarmState shape, RebuildAddressSpace tear-down. Variable updates propagate via ClearChangeMasks so subscribed OPC UA clients see them. Tests boot a real StandardServer + verify sink writes show up in the manager. Production wiring through OpcUaApplicationHost.StartAsync (default server = OtOpcUaSdkServer) + IServiceLevelPublisher SDK binding + #109 OpcUaPublishActor→Phase7Applier integration are the remaining pieces."},
|
||||
{"id": "F11", "subject": "Follow-up: HistorianAdapterActor named-pipe IPC + SqliteStoreAndForwardSink wiring", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "6861381", "deviationNotes": "Reshaped HistorianAdapterActor around the existing IAlarmHistorianSink abstraction (alarm-event shape, not the original tag-history-row stub). Defaults to NullAlarmHistorianSink; production deployments wire SqliteStoreAndForwardSink + WonderwareHistorianClient via AddOtOpcUaRuntime overrides. Actor now exposes GetStatus returning HistorianSinkStatus for diagnostics. Named-pipe transport implementation lives unchanged in src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs — the actor is intentionally just a fire-and-forget bridge.", "origin": "Self-review of Task 45 — stub buffers in-memory; named-pipe + SQLite store-and-forward not wired."},
|
||||
{"id": "F12", "subject": "Follow-up: PeerOpcUaProbeActor real opc.tcp ping (replace Ok=true stub)", "status": "completed", "classification": "small", "estMinutes": 20, "parallelizableWith": [], "blockedBy": [], "commit": "b06e3ae", "deviation": "TCP-connect probe rather than full OPC UA Hello/Acknowledge handshake. Enough for the redundancy calc; deeper liveness signals can layer on later without changing the actor's contract.", "origin": "Self-review of Task 45 — RunProbe always returns Ok=true; replace with OPC UA Client connect."},
|
||||
{"id": "F13", "subject": "Follow-up: Full OpcUaApplicationHost extraction (security/alarms/history/observability)", "status": "partial", "classification": "high-risk", "estMinutes": 120, "parallelizableWith": [], "blockedBy": [], "commit": "36c4751-partial", "deviationNotes": "F13a (cert auto-creation) shipped in 36c4751. Remaining: endpoint-security wiring (SecurityProfileResolver into ServerConfiguration.SecurityPolicies), LDAP user-token validator (the OPC UA UserNameToken path; HTTP-layer LDAP auth is separate and already in OtOpcUa.Security), scripted-alarm node manager creation, history backend wiring, observability hooks (OpenTelemetry metrics + traces). These are gated by F10's OpcUaPublishActor SDK integration — until F10 lands, nothing instantiates OpcUaApplicationHost so the missing wiring is dead weight.", "origin": "Self-review of Task 46 — facade only boots ApplicationInstance + StandardServer. Legacy 391-line file pulls Server.Security/Alarms/History/Observability. Pull those into thin OpcUaServer interfaces."},
|
||||
{"id": "F14", "subject": "Follow-up: Migrate side-effecting Phase7Composer (EquipmentNodeWalker, trace logs, node cache)", "status": "partial", "classification": "standard", "estMinutes": 60, "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 47 — pure version covers the projection. Walker + alarm sink registration + cache mutation stay in legacy until split into Phase7Applier.", "shipped": "Phase7Plan + Phase7Planner.Compute (pure diff over EquipmentNodes/DriverInstancePlans/ScriptedAlarmPlans by stable id, with Added/Removed/Changed lists). Phase7Applier consumes plan + IOpcUaAddressSpaceSink: drives RebuildAddressSpace on Equipment/Alarm topology change, writes inactive AlarmState for removed nodes, catches + logs sink faults. Driver-only changes correctly skip the rebuild (DriverHostActor's spawn-plan in Runtime handles those). Walker integration with the real SDK NodeManager is the remaining piece — split as F14b (consumes the existing EquipmentNodeWalker once F10b lands an SDK builder)."},
|
||||
{"id": "F15", "subject": "Follow-up: Migrate 47 legacy Admin Blazor components into AdminUI library", "status": "completed", "classification": "high-risk", "estMinutes": 180, "commit": "Phase A-D (read views) + F15.2 batches 1-4 (live-edit CRUD) + F15.3 (live alerts/script-log/CSV import/Monaco)", "deviationNotes": "All 4 phases of read-only views shipped: Phase A (shell/auth/fleet/hosts), B (cluster CRUD + Overview/Redundancy), C (Equipment/UNS/Namespaces/Drivers/Tags/ACLs), D (Audit/VirtualTags/ScriptedAlarms/Scripts/RoleGrants/Certificates/Reservations/AlarmsHistorian). Per Q1–Q5 of docs/v2/AdminUI-rebuild-plan.md: typed driver editors deferred, top-level VirtualTags/ScriptedAlarms kept (Q2 reversed for cross-cluster discoverability), routes-not-tabs adopted, fleet-wide LDAP→role map only, generic login errors. Live-edit forms (F15.2) and ScriptLog page (depends on F16 ScriptLogHub) are explicit follow-ups.", "parallelizableWith": [], "blockedBy": [], "origin": "Self-review of Task 48 — only MapAdminUI scaffold + 1 new page (Deployments). 47 pages stay in legacy Admin (accepted-broken) until Task 56."},
|
||||
{"id": "F16", "subject": "Follow-up: Bridge FleetStatusBroadcaster → SignalR hubs (FleetStatusHub / AlertHub / ScriptLogHub)", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "f18c285", "deviation": "FleetStatusHub bridge landed. AlertHub + ScriptLogHub deferred — they need upstream message contracts that aren't defined yet (alerts emerge from F9 ScriptedAlarmActor, script logs from F8 VirtualTagActor).", "origin": "Self-review of Task 49 — hubs are passive Hub subclasses; the bridge from FleetStatusBroadcaster.broadcast → IHubContext is not wired."},
|
||||
{"id": "F17", "subject": "Follow-up: FleetDiagnosticsClient real Akka ActorSelection round-trip (GetDiagnosticsRequest)", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "8f32b89", "origin": "Self-review of Task 51 — client returns an empty snapshot stub. Add GetDiagnosticsRequest contract + DriverHostActor handler + real Ask/Reply."},
|
||||
{"id": "F18", "subject": "Follow-up: Thread HttpContext.User.Identity.Name into Deployments page (createdBy)", "status": "completed", "classification": "small", "estMinutes": 5, "parallelizableWith": [], "blockedBy": [], "commit": "b266f63", "origin": "Self-review of Task 52 — Deployments.razor hardcodes createdBy=\"(current user)\"; needs @inject AuthenticationStateProvider."},
|
||||
{"id": "F19", "subject": "Follow-up: RuntimeStartup extension for driver-role node-actor spawn", "status": "completed", "classification": "standard", "estMinutes": 20, "parallelizableWith": [], "blockedBy": [], "commit": "09d6676", "origin": "Self-review of Task 53 — only admin-role singletons are wired via WithOtOpcUaControlPlaneSingletons. Driver-role nodes need a parallel WithOtOpcUaRuntimeActors that spawns DriverHostActor."},
|
||||
{"id": "F20", "subject": "Follow-up: Wire DriverInstanceActor.ShouldStub() into DriverHostActor child spawn", "status": "completed", "classification": "small", "estMinutes": 10, "parallelizableWith": ["F7"], "blockedBy": [], "origin": "Self-review of Task 55 — ShouldStub helper exists but nothing calls it. Folds into F7 when DriverHostActor learns to spawn DriverInstanceActor children.", "shipped": "DriverHostActor.SpawnChild now calls DriverInstanceActor.ShouldStub(type, _localRoles) and routes Windows-only driver types to the stub path on non-Windows / dev-role hosts. Verified by DriverHostActorReconcileTests.Galaxy_on_non_windows_is_stubbed_by_ShouldStub_check."},
|
||||
{"id": "F21", "subject": "Follow-up: docker-compose.yml for Host.IntegrationTests (real SQL Server + OpenLDAP)", "status": "completed", "classification": "standard", "estMinutes": 30, "parallelizableWith": [], "blockedBy": [], "commit": "b0a2bb0", "deviationNotes": "Stack shipped (SQL on 14331, OpenLDAP on 3894). HarnessMode reads OTOPCUA_HARNESS_USE_SQL=1 / USE_LDAP=1 from env; SQL mode uses per-harness unique DB via EnsureCreated. Compose itself not local-validated — DESKTOP-6JL3KKO has no Docker per CLAUDE.md; CI on Linux will exercise the real path. The xunit test-trait split was punted — env vars are simpler and cover the same use case (one suite, two modes, no test-class duplication).", "origin": "Deviation from Task 58 — TwoNodeClusterHarness uses EF InMemoryDatabase + StubLdapAuthService. For Mac-friendly local runs against real SQL constraints + LDAP, add tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/docker-compose.yml (SQL Server + OpenLDAP), wire EF SqlServer provider behind an env var (OTOPCUA_HARNESS_USE_SQL=1), and add a test trait so CI can run both modes."},
|
||||
{"id": "F22", "subject": "Follow-up: failover scenario integration tests (kill-mid-apply, split-brain, restart-during-deploy)", "status": "completed", "classification": "standard", "estMinutes": 60, "parallelizableWith": [], "blockedBy": [], "commit": "cd5540c", "deviationNotes": "Shipped 3 scenarios on the existing 2-node harness: stop-shrinks, restart-rejoins-same-port, deploy-with-one-node-down. Split-brain via simulated partition deferred — Akka.Hosting + xunit don't expose transport-level interference cleanly. The graceful-shutdown + rejoin path is what production actually exercises; ungraceful kill-mid-apply non-deterministic under SBR's 15s stable-after.", "origin": "Deviation from Task 59 — happy-path + idempotency landed but design §8 cases 3-7 need controlled node-down primitives on TwoNodeClusterHarness (StopNodeAsync, RestartNodeAsync, PartitionBetweenAsync). Add those + 5 scenario tests."}
|
||||
]
|
||||
}
|
||||
+20
-18
@@ -1,5 +1,19 @@
|
||||
# Security
|
||||
|
||||
> **v2 status (2026-05-26).** The four security concerns below are unchanged in v2.
|
||||
> Paths + project names moved: `OtOpcUa.Server/Security/` → `OtOpcUa.Security/`
|
||||
> (`Ldap/`, `Jwt/`, `Endpoints/AuthEndpoints.cs`), `OtOpcUa.Admin` is gone (its
|
||||
> auth + role-grant pages live in `OtOpcUa.AdminUI`), and Admin auth policies
|
||||
> register in `OtOpcUa.Host/Program.cs` via `AddOtOpcUaAuth` rather than in a
|
||||
> separate Admin process. The v2 `Security:Jwt` section adds JWT bearer auth
|
||||
> alongside the existing cookie scheme (`AddJwtBearer` wired via
|
||||
> `IPostConfigureOptions<JwtBearerOptions>` in `OtOpcUa.Security`). DataProtection
|
||||
> keys persist to the shared `ConfigDb.DataProtectionKeys` table so cookies
|
||||
> survive failover between admin-role nodes.
|
||||
>
|
||||
> See `docs/plans/2026-05-26-akka-hosting-alignment-design.md` §5 for the v2
|
||||
> auth + DataProtection rationale.
|
||||
|
||||
OtOpcUa has four independent security concerns. This document covers all four:
|
||||
|
||||
1. **Transport security** — OPC UA secure channel (signing, encryption, X.509 trust).
|
||||
@@ -95,7 +109,7 @@ The Server accepts three OPC UA identity-token types:
|
||||
| Token | Handler | Notes |
|
||||
|---|---|---|
|
||||
| Anonymous | `IUserAuthenticator.AuthenticateAsync(username: "", password: "")` | Refused in strict mode unless explicit anonymous grants exist; allowed in lax mode for backward compatibility. |
|
||||
| UserName/Password | `LdapUserAuthenticator` (`src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/LdapUserAuthenticator.cs`) | LDAP bind + group lookup; resolved `LdapGroups` flow into the session's identity bearer (`ILdapGroupsBearer`). |
|
||||
| UserName/Password | `LdapOpcUaUserAuthenticator` (`src/Server/ZB.MOM.WW.OtOpcUa.Host/OpcUa/LdapOpcUaUserAuthenticator.cs`, backed by `LdapAuthService` at `src/Server/ZB.MOM.WW.OtOpcUa.Security/Ldap/LdapAuthService.cs`) | LDAP bind + group lookup; resolved `LdapGroups` flow into the session's identity bearer (`ILdapGroupsBearer`). |
|
||||
| X.509 Certificate | Stack-level acceptance + role mapping via CN | X.509 identity carries `AuthenticatedUser` + read roles; finer-grain authorization happens through the data-plane ACLs. |
|
||||
|
||||
### LDAP bind flow (`LdapUserAuthenticator`)
|
||||
@@ -207,20 +221,16 @@ The three Write tiers map to Galaxy's v1 `SecurityClassification` — `FreeAcces
|
||||
|
||||
`NodeScope` carries `(ClusterId, NamespaceId, AreaId, LineId, EquipmentId, TagId)`; any suffix may be null — a tag-level ACL is more specific than an area-level ACL but both contribute via union.
|
||||
|
||||
### Dispatch gate — `AuthorizationGate`
|
||||
### Dispatch gate — `IPermissionEvaluator`
|
||||
|
||||
`src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/AuthorizationGate.cs` bridges the OPC UA stack's `ISystemContext.UserIdentity` to the evaluator. `DriverNodeManager` holds exactly one reference to it and calls `IsAllowed(identity, OpcUaOperation.*, NodeScope)` on every Read, Write, HistoryRead, Browse, Subscribe, AckAlarm, Call path. A false return short-circuits the dispatch with `BadUserAccessDenied`.
|
||||
`IPermissionEvaluator.Authorize(session, operation, scope)` (default impl `TriePermissionEvaluator` at `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs`) bridges the OPC UA stack's `ISystemContext.UserIdentity` to the trie. The dispatch path calls it on every Read, Write, HistoryRead, Browse, Subscribe, AckAlarm, Call. A non-allow decision short-circuits the dispatch with `BadUserAccessDenied`.
|
||||
|
||||
Key properties:
|
||||
|
||||
- **Driver-agnostic.** No driver-level code participates in authorization decisions. Drivers report `SecurityClassification` as metadata on tag discovery; everything else flows through `AuthorizationGate`.
|
||||
- **Driver-agnostic.** No driver-level code participates in authorization decisions. Drivers report `SecurityClassification` as metadata on tag discovery; everything else flows through the evaluator.
|
||||
- **Fail-open-during-transition.** `StrictMode = false` (default during ACL rollouts) lets sessions without resolved LDAP groups proceed; flip `Authorization:StrictMode = true` in production once ACLs are populated.
|
||||
- **Evaluator stays pure.** `TriePermissionEvaluator` has no OPC UA stack dependency — it's tested directly from xUnit.
|
||||
|
||||
### Probe-this-permission (Admin UI)
|
||||
|
||||
`PermissionProbeService` (`src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/PermissionProbeService.cs`) lets an operator ask "if a user with groups X, Y, Z asked to do operation O on node N, would it succeed?" The answer is rendered in the AclsTab "Probe" dialog — same evaluator, same trie, so the Admin UI answer and the live Server answer cannot disagree.
|
||||
|
||||
### Full model
|
||||
|
||||
See [`docs/v2/acl-design.md`](v2/acl-design.md) for the complete design: trie invalidation, flag semantics, per-path override rules, and the reasoning behind additive-only (no Deny).
|
||||
@@ -235,7 +245,7 @@ Per decision #150 control-plane roles are **deliberately independent of data-pla
|
||||
|
||||
### Roles
|
||||
|
||||
`src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/AdminRoles.cs`:
|
||||
The `AdminRole` enum (`src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Enums/AdminRole.cs`) defines:
|
||||
|
||||
| Role | Capabilities |
|
||||
|---|---|
|
||||
@@ -243,15 +253,7 @@ Per decision #150 control-plane roles are **deliberately independent of data-pla
|
||||
| `ConfigEditor` | ConfigViewer plus draft editing (UNS, equipment, tags, ACLs, driver instances, reservations, CSV imports). Cannot publish. |
|
||||
| `FleetAdmin` | ConfigEditor plus publish, cluster/node CRUD, credential management, role-grant management. |
|
||||
|
||||
Policies registered in Admin `Program.cs`:
|
||||
|
||||
```csharp
|
||||
builder.Services.AddAuthorizationBuilder()
|
||||
.AddPolicy("CanEdit", p => p.RequireRole(AdminRoles.ConfigEditor, AdminRoles.FleetAdmin))
|
||||
.AddPolicy("CanPublish", p => p.RequireRole(AdminRoles.FleetAdmin));
|
||||
```
|
||||
|
||||
Razor pages and API endpoints gate with `[Authorize(Policy = "CanEdit")]` / `"CanPublish"`; nav-menu sections hide via `<AuthorizeView>`.
|
||||
In v2 the authentication + authorization stack is wired centrally by `AddOtOpcUaAuth` (`src/Server/ZB.MOM.WW.OtOpcUa.Security/ServiceCollectionExtensions.cs`) and Razor pages gate inline with the role names, e.g. `@attribute [Authorize(Roles = "FleetAdmin,ConfigEditor")]` on `Deployments.razor`. Nav-menu sections hide via `<AuthorizeView>`.
|
||||
|
||||
### Role grant source
|
||||
|
||||
|
||||
@@ -0,0 +1,265 @@
|
||||
# Admin UI rebuild plan (F15)
|
||||
|
||||
**Status:** UX kickoff — proposals to react to before any per-page rebuild starts.
|
||||
**Last updated:** 2026-05-26 on `v2-akka-fuse`.
|
||||
|
||||
## Why this isn't a straight port
|
||||
|
||||
The v1 Admin UI was built around `ConfigGeneration` draft → publish:
|
||||
operators edited a **draft** generation, the system computed a **diff** against the
|
||||
last published one, and a manual **Publish** sealed it. Six full pages
|
||||
(`DraftEditor`, `DiffViewer`, `DiffSection`, `Generations`, plus the per-tab
|
||||
"viewing draft N" header) lived to make this workflow legible.
|
||||
|
||||
v2 replaces that with **live-edit + snapshot-deploy** (decisions #14a–#14e on this
|
||||
branch). Edits write directly to live tables guarded by `RowVersion`
|
||||
concurrency; deploying is a single click that snapshots the current live state
|
||||
and dispatches it via Akka. Drift between "current live" and "last sealed
|
||||
deployment" surfaces as a one-line indicator on the
|
||||
[Deployments](../../src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Deployments.razor)
|
||||
page.
|
||||
|
||||
That collapses **six pages → zero** before we ship a line of new Razor. The
|
||||
remaining ~41 legacy pages map to ~30 v2 pages once redundant fleet-wide views
|
||||
fold into their cluster-tab equivalents.
|
||||
|
||||
## Inventory: 47 legacy pages → v2 disposition
|
||||
|
||||
Source: `git show 76310b8^ -- 'src/Server/ZB.MOM.WW.OtOpcUa.Admin/**/*.razor'`.
|
||||
|
||||
### Site shell (5 files) — port
|
||||
|
||||
| Legacy | v2 status | Notes |
|
||||
|---|---|---|
|
||||
| `App.razor`, `Routes.razor`, `_Imports.razor` | Port | Boilerplate; minor render-mode tweaks |
|
||||
| `Layout/MainLayout.razor` | ✅ Already in v2 | Done in Task 48 |
|
||||
| `Components/Pages/Login.razor`, `Account.razor` | Port | Auth endpoints changed (cookie+JWT hybrid, Task 26); login form posts to `/auth/login` now |
|
||||
|
||||
### Shared widgets (5 files) — port
|
||||
|
||||
| Legacy | v2 status |
|
||||
|---|---|
|
||||
| `StatusBadge.razor` | ✅ Already in v2 |
|
||||
| `LoadingSpinner.razor` | ✅ Already in v2 |
|
||||
| `ToastNotification.razor` | ✅ Already in v2 |
|
||||
| `ClusterAuthorizeView.razor`, `RedirectToLogin.razor` | Port — adjust for v2 `IUserAuthenticator` |
|
||||
|
||||
### Fleet (1 file) — reshape
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Fleet.razor` | **Reshape.** Drop the v1 "poller reads central DB" data source. v2 reads `NodeDeploymentState` (Applied / Failed / Stale per node) + subscribes to `FleetStatusHub` for live `ServiceLevel` updates (already wired in F16) + queries `IFleetDiagnosticsClient.GetDiagnostics` (F17) for per-node driver health. Single page, similar shape to v1. |
|
||||
|
||||
### Cluster CRUD (3 files) — port
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `ClustersList.razor` | Port |
|
||||
| `NewCluster.razor` | Port |
|
||||
| `ClusterDetail.razor` | **Port — drop draft/publish chrome.** No "New draft" button; no "current published" sidebar. Replace with "Last deployed" badge + a "Deploy" button (already a SignalR-aware widget on the Deployments page; this becomes a cluster-scoped variant). |
|
||||
|
||||
### Draft/publish workflow (4 files) — **drop entirely**
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `DraftEditor.razor` | **Drop.** No drafts in v2. |
|
||||
| `DiffViewer.razor` | **Drop.** Drift indicator replaces it on Deployments page. |
|
||||
| `DiffSection.razor` | **Drop.** |
|
||||
| `Generations.razor` | **Drop — replaced by `Deployments.razor`** (already shipped in v2 ahead of F15). |
|
||||
|
||||
### Cluster tabs (11 files) — port as live-edit forms
|
||||
|
||||
Each becomes a live-edit surface: load the entity, bind to a form, save with
|
||||
`RowVersion` concurrency check (409 on conflict → toast + reload). No "viewing
|
||||
draft N" header; no per-tab snapshot view.
|
||||
|
||||
| Legacy tab | v2 strategy |
|
||||
|---|---|
|
||||
| `EquipmentTab.razor` | Port — UNS path tree picker stays |
|
||||
| `UnsTab.razor` | Port — same |
|
||||
| `NamespacesTab.razor` | Port |
|
||||
| `DriversTab.razor` | Port — **driver-type-specific editors are a separate question (see below)** |
|
||||
| `TagsTab.razor` | Port |
|
||||
| `AclsTab.razor` | Port — wire to v2 LDAP group → role mapping (see RoleGrants question) |
|
||||
| `RedundancyTab.razor` | Port — surface v2 `ServiceLevel` calc (Task 35) instead of v1 redundancy state machine |
|
||||
| `ScriptedAlarmsTab.razor` | Port |
|
||||
| `ScriptsTab.razor` | Port |
|
||||
| `VirtualTagsTab.razor` | Port |
|
||||
| `AuditTab.razor` | Port — wire to v2 `ConfigAuditLog` (post-F3 schema: `EventId`, `CorrelationId` columns) |
|
||||
|
||||
### Cluster-scoped editors (3 files) — port as reusable inputs
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `IdentificationFields.razor` | Port |
|
||||
| `ImportEquipment.razor` | Port |
|
||||
| `ScriptEditor.razor` | Port |
|
||||
|
||||
### Cross-cluster pages (8 files) — mixed
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Hosts.razor` | Port — reshape to "Akka cluster members" (showing `host:port` NodeIds, roles, redundancy state) |
|
||||
| `Certificates.razor` | Port — F13a's `PkiStoreRoot` becomes the data source |
|
||||
| `Reservations.razor` | Port |
|
||||
| `RoleGrants.razor` | **Reshape.** v1 was cluster-scoped role grants; v2 uses LDAP group → role mapping (see Q4 below) |
|
||||
| `AlarmsHistorian.razor` | Port — wire to F11's `HistorianAdapterActor.GetStatus` (queue depth + drain state) |
|
||||
| `ScriptLog.razor` | Port — needs SignalR hub bridge (F16 deferred ScriptLogHub) |
|
||||
| `ScriptedAlarms.razor` (top-level) | **Possibly drop** (see Q2 below) |
|
||||
| `VirtualTags.razor` (top-level) | **Possibly drop** (see Q2 below) |
|
||||
|
||||
### Driver-typed editors (5 files) — sequencing decision needed
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Drivers/FocasDetail.razor` | Defer — JSON editor in `DriversTab` covers the same config initially |
|
||||
| `Modbus/ModbusOptionsEditor.razor` | Same |
|
||||
| `Modbus/ModbusAddressEditor.razor` | Same |
|
||||
| `Modbus/ModbusAddressPreview.razor` | Same |
|
||||
| `Modbus/ModbusDiagnostics.razor` | Port — separate from the config editor, this is operational telemetry |
|
||||
|
||||
### Account (1 file) — port
|
||||
|
||||
| Legacy | v2 strategy |
|
||||
|---|---|
|
||||
| `Account.razor` | Port — minor reshape for JWT (token expiry UI, refresh button) |
|
||||
|
||||
## Summary by disposition
|
||||
|
||||
| Disposition | Count |
|
||||
|---|---|
|
||||
| Already in v2 | 5 |
|
||||
| Port as-is | 22 |
|
||||
| Port + reshape | 7 |
|
||||
| **Drop (replaced by live-edit / Deployments page)** | **5** |
|
||||
| Drop (redundant with cluster tab) | 2 (pending Q2) |
|
||||
| Defer (driver-typed editors) | 4 |
|
||||
| **Total active rebuild** | ~30 pages |
|
||||
|
||||
## Open design questions
|
||||
|
||||
These need answers before per-page sequencing starts. They drive how many
|
||||
phases the rebuild takes and what gets cut.
|
||||
|
||||
### Q1 — Driver-typed editors: ship now or defer?
|
||||
|
||||
**Context.** v1 had typed editors for Modbus + FOCAS driver config. They sat
|
||||
behind a generic JSON editor for the other six driver types. The typed editors
|
||||
caught operator typos that the JSON editor missed (port ranges, slave-ID
|
||||
collisions, address-map overlaps).
|
||||
|
||||
**Options.**
|
||||
- **Defer all typed editors.** Ship `DriversTab` with a JSON editor first; add
|
||||
typed editors per-driver as field requests come in. Saves ~1 day on F15.
|
||||
- **Port the existing two.** Modbus + FOCAS were already validated against
|
||||
field use. The other six driver types stay JSON-only.
|
||||
- **Ship all eight typed editors.** Most work, best UX. ~3 extra days on F15.
|
||||
|
||||
**Recommendation:** Defer. The OPC UA dual-endpoint tests + driver
|
||||
engine wiring (F7-F10) are higher-leverage and need attention first.
|
||||
|
||||
### Q2 — Top-level `ScriptedAlarms.razor` and `VirtualTags.razor`: keep or drop?
|
||||
|
||||
**Context.** In v1, these were fleet-wide views of every scripted alarm and
|
||||
virtual tag across every cluster. The cluster tabs let you edit them; the
|
||||
top-level pages let you find them across clusters.
|
||||
|
||||
**Options.**
|
||||
- **Drop.** Fleet-wide view is rare; cluster scope covers 95% of use.
|
||||
- **Keep as read-only.** Cross-cluster search + drill-down to the per-cluster tab.
|
||||
|
||||
**Recommendation:** Drop, but expose a global search on the top nav that
|
||||
matches cluster + alarm/tag names if operators ask.
|
||||
|
||||
### Q3 — ClusterDetail: 10 tabs or split routes?
|
||||
|
||||
**Context.** v1 had 10 nav-tabs inside `ClusterDetail.razor`. Some are very
|
||||
heavy (Tags can be 10k rows; AuditTab streams). All 10 share render state.
|
||||
|
||||
**Options.**
|
||||
- **Keep tabs.** Familiar; one URL per cluster.
|
||||
- **Split into routes.** `/clusters/{id}/equipment`, `/clusters/{id}/tags`,
|
||||
etc. Better deep-linking, better load (one tab's data per page), easier auth
|
||||
scoping.
|
||||
|
||||
**Recommendation:** Split into routes. The v1 monolith was already groaning
|
||||
under the live-update SignalR fan-in; routes let each surface manage its own
|
||||
subscription lifecycle.
|
||||
|
||||
### Q4 — RoleGrants: cluster-scoped table or LDAP group → role map?
|
||||
|
||||
**Context.** v1 had a per-cluster `RoleGrants` table where you mapped users to
|
||||
cluster-scoped roles (ClusterAdmin, ClusterOperator, etc.). v2 introduced
|
||||
LDAP-driven auth: LDAP group membership maps to OPC UA permissions
|
||||
(`ReadOnly`, `WriteOperate`, `WriteTune`, `WriteConfigure`, `AlarmAck`)
|
||||
fleet-wide.
|
||||
|
||||
**Options.**
|
||||
- **Keep v1 model.** Cluster-scoped grants survive; LDAP just provides the
|
||||
username.
|
||||
- **Replace with fleet-wide LDAP-group → role mapping.** v2's `LdapOptions`
|
||||
already has a `GroupToRole` dictionary; surface that in a single fleet-level
|
||||
page.
|
||||
- **Both.** LDAP map for fleet-wide defaults; per-cluster overrides for
|
||||
scoping.
|
||||
|
||||
**Recommendation:** Fleet-wide LDAP-group → role map only. Per-cluster scoping
|
||||
adds combinatorial complexity that v2's redundancy model doesn't need
|
||||
(every driver-role node runs every driver in the fleet).
|
||||
|
||||
### Q5 — Login UI: backed by `/auth/login` (cookie+JWT hybrid) — what about LDAP error UX?
|
||||
|
||||
**Context.** v2's `/auth/login` does an LDAP bind. Failures come back as
|
||||
specific reasons (invalid creds vs. service-account misconfig vs. server
|
||||
unreachable). The default behavior is to lump them all into "Login failed."
|
||||
|
||||
**Options.**
|
||||
- **Generic "Login failed."** Safer; doesn't leak whether the username exists.
|
||||
- **Specific error categories.** Helps operators diagnose deploy issues.
|
||||
|
||||
**Recommendation:** Generic for production deployments, specific when
|
||||
`Authentication:Ldap:AllowInsecureLdap=true` (dev mode signal).
|
||||
|
||||
## Proposed sequencing (4 phases)
|
||||
|
||||
Each phase is independently mergeable. The branch ships when Phase A is in;
|
||||
Phases B–D can follow as smaller PRs.
|
||||
|
||||
### Phase A — Shell + auth + fleet (minimum-viable Admin)
|
||||
~½–1 day. Ships a working admin surface with no config editing.
|
||||
- Port `App.razor`, `Routes.razor`, `_Imports.razor`
|
||||
- Port `Login.razor` (post Q5)
|
||||
- Port `Account.razor`
|
||||
- Reshape `Fleet.razor` against v2 data sources
|
||||
- Port `Hosts.razor` reshape
|
||||
|
||||
### Phase B — Cluster CRUD + Overview/Redundancy tabs
|
||||
~1 day. Adds cluster browse + readonly redundancy view.
|
||||
- Port `ClustersList`, `NewCluster`, `ClusterDetail` (Overview tab only)
|
||||
- Port `RedundancyTab` (read-only — surfaces v2 `ServiceLevel`)
|
||||
- Split into routes if Q3 = split
|
||||
|
||||
### Phase C — Config editor tabs
|
||||
~2 days. The big chunk — the live-edit config surface.
|
||||
- `EquipmentTab`, `UnsTab`, `NamespacesTab`
|
||||
- `DriversTab` (JSON-only initially per Q1)
|
||||
- `TagsTab`
|
||||
- `AclsTab` post Q4 reshape
|
||||
- `ImportEquipment`, `IdentificationFields`
|
||||
|
||||
### Phase D — Logic + ops pages
|
||||
~1 day.
|
||||
- `VirtualTagsTab`, `ScriptedAlarmsTab`, `ScriptsTab`, `ScriptEditor`
|
||||
- `AuditTab` against new ConfigAuditLog schema
|
||||
- `RoleGrants` post Q4 reshape
|
||||
- `Certificates`
|
||||
- `Reservations`
|
||||
- `AlarmsHistorian`, `ScriptLog` (depends on F16 ScriptLogHub deferred)
|
||||
|
||||
## Out of scope for F15
|
||||
|
||||
- Typed driver editors (Q1, deferred unless reversed)
|
||||
- Top-level fleet-wide ScriptedAlarms / VirtualTags pages (Q2, recommended drop)
|
||||
- Per-cluster RoleGrants (Q4, recommended drop)
|
||||
- ScriptLogHub SignalR bridge (F16 deferred — only needed for Phase D's
|
||||
ScriptLog page; can move to a separate F16-extension follow-up)
|
||||
@@ -0,0 +1,128 @@
|
||||
# OtOpcUa v2 Architecture
|
||||
|
||||
Single-page tour of the v2 layout. For decision history + tradeoffs, see [`2026-05-26-akka-hosting-alignment-design.md`](../plans/2026-05-26-akka-hosting-alignment-design.md).
|
||||
|
||||
## Big picture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ OtOpcUa.Host │ (fused binary)
|
||||
│ │
|
||||
│ reads OTOPCUA_ROLES env, mounts: │
|
||||
│ ┌─────────────────────────────────────┐ │
|
||||
│ │ admin → Blazor + auth + control- │ │
|
||||
│ │ plane singletons │ │
|
||||
│ │ driver → OPC UA endpoint + │ │
|
||||
│ │ per-node actors │ │
|
||||
│ └─────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────┘
|
||||
│
|
||||
│ joins
|
||||
▼
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ Akka.NET cluster │
|
||||
│ (split-brain resolver: keep-oldest, 15s) │
|
||||
└─────────────────────────────────────────────┘
|
||||
|
||||
shared by every node: ┌─────────────────┐
|
||||
│ ConfigDb (SQL) │ live-edit + Deployment artifacts + audit
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
The v1 setup was two separate Windows services (`OtOpcUa.Server` + `OtOpcUa.Admin`) talking through the DB. v2 collapses them into one binary with role gating, and adds an Akka cluster so admin singletons can drive deploys and the redundancy story is automatic.
|
||||
|
||||
## Project layout
|
||||
|
||||
```
|
||||
src/Core/ shared abstractions, no Server deps
|
||||
ZB.MOM.WW.OtOpcUa.Commons types + Akka message contracts + interfaces
|
||||
ZB.MOM.WW.OtOpcUa.Cluster HOCON, AkkaClusterOptions, IClusterRoleInfo
|
||||
ZB.MOM.WW.OtOpcUa.Configuration EF Core DbContext + entities
|
||||
|
||||
src/Server/ server-side projects
|
||||
ZB.MOM.WW.OtOpcUa.Security cookie+JWT auth, LDAP, JwtTokenService
|
||||
ZB.MOM.WW.OtOpcUa.ControlPlane admin-role cluster singletons
|
||||
ZB.MOM.WW.OtOpcUa.Runtime driver-role per-node actors
|
||||
ZB.MOM.WW.OtOpcUa.OpcUaServer OPC UA endpoint facade + Phase7Composer
|
||||
ZB.MOM.WW.OtOpcUa.AdminUI Blazor Razor class library
|
||||
ZB.MOM.WW.OtOpcUa.Host fused binary (Program.cs)
|
||||
```
|
||||
|
||||
| Project | Role | Doc |
|
||||
|---|---|---|
|
||||
| Cluster | Bootstrap + cluster topology view | [Cluster.md](Cluster.md) |
|
||||
| ControlPlane | Admin singletons (deploy, audit, fleet, redundancy) | [ControlPlane.md](ControlPlane.md) |
|
||||
| Runtime | Driver-role actor tree | [Runtime.md](Runtime.md) |
|
||||
| Security | Cookie+JWT auth, LDAP, /auth/* endpoints | [../security.md](../security.md) |
|
||||
| OpcUaServer | OPC UA endpoint host + composer | [../OpcUaServer.md](../OpcUaServer.md) |
|
||||
| Host | Role-gated DI graph + Program.cs | [../ServiceHosting.md](../ServiceHosting.md) |
|
||||
|
||||
## Role gating
|
||||
|
||||
`Program.cs` reads `OTOPCUA_ROLES` once (per process) and decides what to wire:
|
||||
|
||||
```csharp
|
||||
var roles = RoleParser.Parse(Environment.GetEnvironmentVariable("OTOPCUA_ROLES"));
|
||||
var hasAdmin = roles.Contains("admin");
|
||||
var hasDriver = roles.Contains("driver");
|
||||
|
||||
builder.Services.AddOtOpcUaConfigDb(builder.Configuration);
|
||||
builder.Services.AddOtOpcUaCluster(builder.Configuration);
|
||||
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp); // HOCON + remote + cluster options
|
||||
if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
|
||||
if (hasDriver) ab.WithOtOpcUaRuntimeActors();
|
||||
});
|
||||
|
||||
if (hasAdmin)
|
||||
{
|
||||
builder.Services.AddOtOpcUaAuth(builder.Configuration);
|
||||
builder.Services.AddAdminUI();
|
||||
// SignalR, AdminOpsClient, etc.
|
||||
}
|
||||
|
||||
builder.Services.AddOtOpcUaHealth();
|
||||
```
|
||||
|
||||
There is a **single** ActorSystem. Cluster singletons + per-node actors share it via the `Akka.Hosting` registry. This was a v2 fix (the initial Phase 9 wiring ran two ActorSystems by mistake; see commit `d6fac2d`).
|
||||
|
||||
## Live-edit vs draft/publish
|
||||
|
||||
v1 had `ConfigGeneration(Draft|Published)` with every live-edit entity FK'd to a generation. Edits accumulated in a Draft until Publish promoted them.
|
||||
|
||||
v2 removes that entirely:
|
||||
|
||||
- No `ConfigGeneration` table, no `GenerationId` columns.
|
||||
- Every live-edit entity has a `RowVersion` (`IsRowVersion()`) for last-write-wins.
|
||||
- Audit goes to `ConfigEdit` (per-row delta) and `ConfigAuditLog` (event-level).
|
||||
- Deploys snapshot the *current* DB state into an immutable `Deployment.ArtifactBlob` + its `RevisionHash`. That artifact is what driver nodes apply.
|
||||
|
||||
See [ControlPlane.md § Deploy flow](ControlPlane.md#deploy-flow) for the end-to-end dispatch + ACK + seal sequence.
|
||||
|
||||
## NodeId
|
||||
|
||||
Each cluster member has a `NodeId` derived as `{PublicHostname}:{Port}` of the Akka remote endpoint. `ClusterRoleInfo.LocalNode` + `ConfigPublishCoordinator.DiscoverDriverNodes()` use the same formula so they always agree. The port suffix makes loopback test deployments distinguishable (commit `5cfbe8b`); in production the hostname alone is already unique.
|
||||
|
||||
## Health endpoints
|
||||
|
||||
| Path | Returns 200 when… |
|
||||
|---|---|
|
||||
| `/healthz` | Process is alive (no checks). |
|
||||
| `/health/ready` | DB reachable + this node is `Up` in the cluster. |
|
||||
| `/health/active` | This node is the admin role-leader (used by Traefik/HA-LB to pin traffic). |
|
||||
|
||||
## What lives where (quick map)
|
||||
|
||||
| Concern | Project | Entry point |
|
||||
|---|---|---|
|
||||
| Read OTOPCUA_ROLES | `Cluster.RoleParser` | static `Parse(string?)` |
|
||||
| Cluster lifecycle | `Cluster.WithOtOpcUaClusterBootstrap` | extension on `AkkaConfigurationBuilder` |
|
||||
| Local node identity | `Cluster.IClusterRoleInfo.LocalNode` | DI singleton |
|
||||
| Admin singletons | `ControlPlane.WithOtOpcUaControlPlaneSingletons` | extension on `AkkaConfigurationBuilder` |
|
||||
| Driver actors | `Runtime.WithOtOpcUaRuntimeActors` | extension on `AkkaConfigurationBuilder` |
|
||||
| Auth pipeline | `Security.AddOtOpcUaAuth` + `MapOtOpcUaAuth` | extensions on `IServiceCollection` / `IEndpointRouteBuilder` |
|
||||
| OPC UA facade | `OpcUaServer.OpcUaApplicationHost` | runtime host, started by driver-role startup |
|
||||
| Partner-URI advertising | `OpcUaServer.OpcUaApplicationHost.PopulateServerArray` | runs after `_application.Start`, appends `PeerApplicationUris` to the SDK `ServerUris` `StringTable` so `Server.ServerArray` (i=2254) returns self + peers |
|
||||
| Health endpoints | `Host.Health.AddOtOpcUaHealth` + `MapOtOpcUaHealth` | extensions on `IServiceCollection` / `IEndpointRouteBuilder` |
|
||||
@@ -0,0 +1,104 @@
|
||||
# OtOpcUa.Cluster
|
||||
|
||||
Akka.NET cluster bootstrap + topology view. Used by every other server-side project to talk to the live cluster.
|
||||
|
||||
Path: `src/Core/ZB.MOM.WW.OtOpcUa.Cluster/`
|
||||
|
||||
## Public surface
|
||||
|
||||
| Type | Role |
|
||||
|---|---|
|
||||
| `AkkaClusterOptions` | DI-bound options from `appsettings.json::Cluster`. Hostname/Port/PublicHostname/SeedNodes/Roles. |
|
||||
| `IClusterRoleInfo` (interface in Commons) | Live view of cluster membership + role-leader topology. Thread-safe + event-raising. |
|
||||
| `ClusterRoleInfo` | Implementation. Subscribes to `ClusterEvent.IMemberEvent` + `RoleLeaderChanged` + `LeaderChanged`. |
|
||||
| `HoconLoader.LoadBaseConfig()` | Reads the embedded `Resources/akka.conf`. |
|
||||
| `RoleParser.Parse(string?)` | Parses `OTOPCUA_ROLES` env var into a deduped `string[]`. |
|
||||
| `ServiceCollectionExtensions.AddOtOpcUaCluster(configuration)` | Binds options + registers `IClusterRoleInfo` singleton. **Does not** start an ActorSystem. |
|
||||
| `WithOtOpcUaClusterBootstrap(serviceProvider)` | Extension on `AkkaConfigurationBuilder`. Loads embedded HOCON + applies `WithRemoting(...)` + `WithClustering(...)` from options. |
|
||||
|
||||
## Bootstrap flow
|
||||
|
||||
```csharp
|
||||
// Program.cs
|
||||
builder.Services.AddOtOpcUaCluster(builder.Configuration);
|
||||
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp); // HOCON + remote + cluster
|
||||
// …singletons + node actors layered on
|
||||
});
|
||||
```
|
||||
|
||||
Order matters: `AddOtOpcUaCluster` must come before `AddAkka` so the options binding has run by the time the `AddAkka` lambda fires. Inside the lambda, `WithOtOpcUaClusterBootstrap` resolves `IOptions<AkkaClusterOptions>` from `sp` and writes them into the Akka builder.
|
||||
|
||||
The single ActorSystem this produces is what every other v2 piece runs on. There is no second Akka instance — that was a Phase 9 bug (commit `d6fac2d` consolidated).
|
||||
|
||||
## Embedded HOCON
|
||||
|
||||
`src/Core/ZB.MOM.WW.OtOpcUa.Cluster/Resources/akka.conf` contains:
|
||||
|
||||
| Setting | Value | Why |
|
||||
|---|---|---|
|
||||
| `akka.actor.provider` | `cluster` | Required for `Cluster.Get(system)` to work. |
|
||||
| `akka.cluster.split-brain-resolver.active-strategy` | `keep-oldest` | Smaller/younger side downs itself on partition. |
|
||||
| `akka.cluster.split-brain-resolver.stable-after` | `15s` | Time before SBR acts. |
|
||||
| `akka.cluster.failure-detector.threshold` | `10.0` | Higher than default (8.0) for GC-pause tolerance. |
|
||||
| `opcua-synchronized-dispatcher.type` | `PinnedDispatcher` | Dedicated thread for `OpcUaPublishActor` so SDK calls stay marshalled. |
|
||||
|
||||
The Cluster.Tests project verifies these key values stay correct (`HoconLoaderTests`).
|
||||
|
||||
## Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"Cluster": {
|
||||
"Hostname": "0.0.0.0",
|
||||
"Port": 4053,
|
||||
"PublicHostname": "node-a.lan",
|
||||
"SeedNodes": ["akka.tcp://otopcua@node-a.lan:4053"],
|
||||
"Roles": ["admin", "driver"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `Hostname`: interface to bind. `0.0.0.0` listens on every interface.
|
||||
- `Port`: TCP port for cluster gossip. Default 4053.
|
||||
- `PublicHostname`: address advertised in cluster gossip. Must be reachable by every other node.
|
||||
- `SeedNodes`: where new nodes go to join. List one (or two) stable nodes. First node bootstraps the cluster from its own address.
|
||||
- `Roles`: free-form tags Akka gossip propagates. v2 uses `admin` + `driver`; per-role wiring in `Program.cs` reads `OTOPCUA_ROLES` env var, not this list — these two should stay in sync.
|
||||
|
||||
Per-role overlay files (`appsettings.admin.json`, `appsettings.driver.json`, `appsettings.admin-driver.json`) layer on top of base `appsettings.json` based on the parsed `OTOPCUA_ROLES` (alphabetical, joined by `-`). See [ServiceHosting.md § Per-role configuration overlays](../ServiceHosting.md#per-role-configuration-overlays).
|
||||
|
||||
## IClusterRoleInfo
|
||||
|
||||
Anywhere in the host that needs the local node's identity or a view of who-else-is-in-the-cluster, inject `IClusterRoleInfo`:
|
||||
|
||||
```csharp
|
||||
public sealed class MyService(IClusterRoleInfo cluster)
|
||||
{
|
||||
public NodeId Self => cluster.LocalNode;
|
||||
public IReadOnlyList<NodeId> Drivers => cluster.MembersWithRole("driver");
|
||||
public NodeId? AdminLeader => cluster.RoleLeader("admin");
|
||||
|
||||
public MyService(IClusterRoleInfo cluster)
|
||||
{
|
||||
cluster.RoleLeaderChanged += (_, e) =>
|
||||
Console.WriteLine($"role={e.Role}: {e.PreviousLeader} → {e.NewLeader}");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`LocalNode` is `{PublicHostname}:{Port}` (the port suffix lets loopback test deployments stay distinct; production hostnames are already unique). `ConfigPublishCoordinator` uses the same `{host}:{port}` formula so the expected-ack set and the driver self-identification agree (commit `5cfbe8b`).
|
||||
|
||||
## Lifecycle
|
||||
|
||||
Akka.Hosting owns the lifecycle: `IHostedService` starts the ActorSystem at host start, runs `CoordinatedShutdown.ClusterLeavingReason` on host stop. The Cluster project does not register its own `IHostedService` (the v1 `AkkaHostedService` was deleted in commit `d6fac2d`).
|
||||
|
||||
## Tests
|
||||
|
||||
`tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests/` covers:
|
||||
|
||||
- `HoconLoaderTests` — embedded resource loads + key settings parse correctly.
|
||||
- `RoleParserTests` — comma-split + dedup + trim semantics.
|
||||
|
||||
Cross-project integration is in `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/` (cluster formation, deploy round-trip).
|
||||
@@ -0,0 +1,99 @@
|
||||
# OtOpcUa.ControlPlane
|
||||
|
||||
Five admin-role cluster singletons that drive the v2 deploy, audit, fleet, and redundancy stories. Path: `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/`.
|
||||
|
||||
## Singletons
|
||||
|
||||
| Actor | File | Marker key | Role |
|
||||
|---|---|---|---|
|
||||
| `ConfigPublishCoordinator` | `Coordinators/ConfigPublishCoordinator.cs` | `ConfigPublishCoordinatorKey` | Dispatches `DispatchDeployment`, collects `ApplyAck`s, seals/fails/times-out. |
|
||||
| `AdminOperationsActor` | `AdminOperations/AdminOperationsActor.cs` | `AdminOperationsActorKey` | Receives `StartDeployment` from the UI, snapshots ConfigDb via `ConfigComposer`, persists `Deployment` row + `ConfigEdit` marker, tells the coordinator to dispatch. |
|
||||
| `AuditWriterActor` | `Audit/AuditWriterActor.cs` | `AuditWriterActorKey` | Batched `ConfigAuditLog` writer. Flushes every 500 events or 5 s. In-buffer dedup; cross-restart dedup tracked as F3. |
|
||||
| `FleetStatusBroadcaster` | `Fleet/FleetStatusBroadcaster.cs` | `FleetStatusBroadcasterKey` | Aggregates per-node `FleetNodeStatus` heartbeats; publishes `FleetStatusChanged` on the `fleet-status` DPS topic (SignalR bridge tracked as F16). |
|
||||
| `RedundancyStateActor` | `Redundancy/RedundancyStateActor.cs` | `RedundancyStateActorKey` | Cluster-event subscriber; debounces 250 ms; publishes `RedundancyStateChanged` on the `redundancy-state` DPS topic. |
|
||||
|
||||
All five register via `WithOtOpcUaControlPlaneSingletons()` (extension on `AkkaConfigurationBuilder`). Each uses `ClusterSingletonOptions { Role = "admin" }` so the singleton runs on the admin role-leader and migrates to the next admin node on failover.
|
||||
|
||||
```csharp
|
||||
// Program.cs (admin role only)
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp);
|
||||
if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
|
||||
if (hasDriver) ab.WithOtOpcUaRuntimeActors();
|
||||
});
|
||||
```
|
||||
|
||||
Resolve from anywhere via `IRequiredActor<T>` or the `ActorRegistry`:
|
||||
|
||||
```csharp
|
||||
public sealed class AdminOperationsClient(ActorRegistry registry) : IAdminOperationsClient
|
||||
{
|
||||
private readonly IActorRef _proxy = registry.Get<AdminOperationsActorKey>();
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
## Deploy flow
|
||||
|
||||
```
|
||||
UI → IAdminOperationsClient.StartDeploymentAsync(createdBy)
|
||||
│ Ask the AdminOperationsActor singleton proxy
|
||||
▼
|
||||
AdminOperationsActor
|
||||
│ ConfigComposer.SnapshotAndFlattenAsync(db) → ConfigArtifact(blob, revHash)
|
||||
│ insert Deployment(Dispatching) + ConfigEdit marker
|
||||
│ Tell coordinator → DispatchDeployment
|
||||
▼
|
||||
ConfigPublishCoordinator
|
||||
│ DiscoverDriverNodes() → expected ACK set (host:port per member)
|
||||
│ insert NodeDeploymentState(Applying) per driver
|
||||
│ Publish DispatchDeployment on "deployments" topic
|
||||
│ Start apply-deadline timer (2 min default)
|
||||
▼ DistributedPubSub
|
||||
DriverHostActor (on each driver node — subscribed to "deployments")
|
||||
│ PreStart subscribed; current state Steady(rev)
|
||||
│ if currentRev == msg.rev → immediate ApplyAck(Applied) (idempotent)
|
||||
│ else Become(Applying) → write NodeDeploymentStatus → ApplyAck
|
||||
▼ via "deployment-acks" topic
|
||||
ConfigPublishCoordinator (subscribed to "deployment-acks" in PreStart)
|
||||
│ PersistNodeAck + collect
|
||||
│ all-Applied → Sealed
|
||||
│ any-Failed → PartiallyFailed
|
||||
│ deadline → TimedOut
|
||||
```
|
||||
|
||||
The dedicated `deployment-acks` topic + coordinator subscription was added in commit `5cfbe8b`. Before that, ACKs were published back on `deployments` and the coordinator (not subscribed) silently dropped them — deployments hung at `AwaitingApplyAcks` forever in multi-node tests.
|
||||
|
||||
### Failover recovery
|
||||
|
||||
If the admin singleton fails over mid-deploy, the new instance's `PreStart` queries `NodeDeploymentState` for any `Dispatching`/`AwaitingApplyAcks` row, rebuilds `_expectedAcks` + `_acks` from persisted state, and resumes the deadline timer. See `Coordinators/ConfigPublishCoordinator.cs::PreStart`.
|
||||
|
||||
## ConfigComposer
|
||||
|
||||
Pure function `SnapshotAndFlattenAsync(db) → ConfigArtifact(byte[], string)`:
|
||||
|
||||
1. Reads every live-edit table.
|
||||
2. Serialises to a stable byte[] (deterministic ordering).
|
||||
3. Computes SHA-256 over the bytes → 64-hex `RevisionHash`.
|
||||
|
||||
Same DB state → same artifact + same hash. That's what makes the `NoChanges` outcome work (AdminOperations compares the proposed hash to the last sealed deployment's hash).
|
||||
|
||||
## ServiceLevelCalculator
|
||||
|
||||
Pure function exposed at `Redundancy/ServiceLevelCalculator.Compute(NodeHealthInputs)`. Returns the OPC UA `ServiceLevel` byte per the truth table in [Redundancy.md](../Redundancy.md#servicelevel-tiers-part-5-65). No side effects; trivially unit-testable.
|
||||
|
||||
## DPS topics
|
||||
|
||||
| Topic | Publisher | Subscribers |
|
||||
|---|---|---|
|
||||
| `deployments` | ConfigPublishCoordinator | DriverHostActor (per-node) |
|
||||
| `deployment-acks` | DriverHostActor | ConfigPublishCoordinator |
|
||||
| `fleet-status` | FleetStatusBroadcaster | (SignalR bridge — F16) |
|
||||
| `redundancy-state` | RedundancyStateActor | (per-node ServiceLevel calc — F10) |
|
||||
|
||||
## Tests
|
||||
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/` — 29 tests covering coordinator (happy path, timeout, failover recovery), AdminOps (StartDeployment outcomes), AuditWriter (batching, dedup), FleetStatusBroadcaster (heartbeat staleness), RedundancyStateActor (debounce, snapshot), ConfigComposer (purity), ServiceLevelCalculator (truth table).
|
||||
|
||||
Multi-node tests (cross-ActorSystem) are in `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/`.
|
||||
@@ -0,0 +1,126 @@
|
||||
# OtOpcUa.Runtime
|
||||
|
||||
Driver-role actor tree — one set per node. Path: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/`.
|
||||
|
||||
## Actor tree
|
||||
|
||||
```
|
||||
DriverHostActor (per node)
|
||||
│ state machine: Steady ⇄ Applying ⇄ Stale
|
||||
│
|
||||
├──▶ DriverInstanceActor (per configured DriverInstance row)
|
||||
│ state: Connecting → Connected → Reconnecting (or Stubbed)
|
||||
│
|
||||
├──▶ VirtualTagActor (per VirtualTag row)
|
||||
│ compiles + evaluates expression, publishes derived value
|
||||
│
|
||||
├──▶ ScriptedAlarmActor (per ScriptedAlarm row)
|
||||
│ state: Inactive ⇄ Active ⇄ Acknowledged
|
||||
│
|
||||
├──▶ OpcUaPublishActor (per node, pinned dispatcher)
|
||||
│ marshalled OPC UA SDK writes + RebuildAddressSpace
|
||||
│
|
||||
├──▶ HistorianAdapterActor (per node)
|
||||
│ pipe IPC to Wonderware historian sidecar
|
||||
│
|
||||
├──▶ PeerOpcUaProbeActor (per peer node)
|
||||
│ opc.tcp ping → redundancy-state DPS topic
|
||||
│
|
||||
└──▶ DbHealthProbeActor (per node)
|
||||
cached SELECT 1; consumed by /health/ready + redundancy calc
|
||||
```
|
||||
|
||||
## Public surface
|
||||
|
||||
| Type | File |
|
||||
|---|---|
|
||||
| `WithOtOpcUaRuntimeActors()` | `ServiceCollectionExtensions.cs` — extension on `AkkaConfigurationBuilder`. Spawns `DriverHostActor` + `DbHealthProbeActor` on the host's ActorSystem. |
|
||||
| `DriverHostActor` | `Drivers/DriverHostActor.cs` |
|
||||
| `DriverInstanceActor` | `Drivers/DriverInstanceActor.cs` |
|
||||
| `VirtualTagActor` | `VirtualTags/VirtualTagActor.cs` |
|
||||
| `ScriptedAlarmActor` | `ScriptedAlarms/ScriptedAlarmActor.cs` |
|
||||
| `OpcUaPublishActor` | `OpcUa/OpcUaPublishActor.cs` |
|
||||
| `HistorianAdapterActor` | `Historian/HistorianAdapterActor.cs` |
|
||||
| `PeerOpcUaProbeActor` | `Health/PeerOpcUaProbeActor.cs` |
|
||||
| `DbHealthProbeActor` | `Health/DbHealthProbeActor.cs` |
|
||||
|
||||
Marker keys for registry lookup: `DriverHostActorKey`, `DbHealthProbeActorKey`.
|
||||
|
||||
## DriverHostActor
|
||||
|
||||
Per-node supervisor with three Become states:
|
||||
|
||||
| State | Meaning |
|
||||
|---|---|
|
||||
| `Steady(rev)` | Caught up. `DispatchDeployment` with `msg.rev == currentRev` → immediate `ApplyAck(Applied)` (idempotent). New rev → `Become(Applying)`. |
|
||||
| `Applying(id)` | Apply in progress. Further `DispatchDeployment` for in-flight ID → debug-log + ignore. For new ID → defer via `Self.Forward`. |
|
||||
| `Stale` | ConfigDb unreachable on bootstrap. Periodic `RetryConfigDbConnection` tries to advance to `Steady`. |
|
||||
|
||||
`PreStart`:
|
||||
|
||||
1. Subscribe to `deployments` DPS topic.
|
||||
2. Read most-recent `NodeDeploymentState` for this node from ConfigDb.
|
||||
3. If `Applied` → restore `_currentRevision`, `Become(Steady)`.
|
||||
4. If `Applying` (orphan from crash) → replay apply (idempotent).
|
||||
5. If `Failed` → `Become(Steady)` at last known rev.
|
||||
6. DB unreachable → `Become(Stale)`, start retry timer.
|
||||
|
||||
ACK publishing: when no `_coordinatorOverride` is set (production), `SendAck` publishes on the dedicated `deployment-acks` DPS topic which the coordinator subscribes to (commit `5cfbe8b`).
|
||||
|
||||
## DriverInstanceActor
|
||||
|
||||
Per-driver-instance child. State machine:
|
||||
|
||||
- `Connecting` → first attempt to reach the underlying driver
|
||||
- `Connected` → subscriptions active, reads/writes flow
|
||||
- `Reconnecting` → temporary disconnect; backoff retry
|
||||
- `Stubbed` → DEV-STUB mode for Windows-only drivers (Galaxy, Wonderware Historian) on non-Windows or when `roles` contains `dev`
|
||||
|
||||
`ShouldStub(driverType, roles)` returns `true` for `"Galaxy" | "Historian.Wonderware"` on non-Windows; the actor goes straight to `Stubbed` and returns deterministic success without touching real hardware. Wiring this into the DriverHost child-spawn path is follow-up F20 (folds into F7).
|
||||
|
||||
Engine wiring (subscription publishing, ApplyDelta diff, bad-quality-on-disconnect, write path, supervisor backoff) is stubbed — tracked as F7. Tests exercise message contracts, not engine behaviour.
|
||||
|
||||
## VirtualTagActor / ScriptedAlarmActor
|
||||
|
||||
Skeleton state machines + message handlers. Engine work:
|
||||
|
||||
- `VirtualTagEngine.Evaluate()` not yet called from `VirtualTagActor.DependencyValueChanged` (F8).
|
||||
- `AlarmConditionService` not yet called from `ScriptedAlarmActor` (F9).
|
||||
- `ScriptedAlarmState` DB persistence on `PreRestart` not wired (F9).
|
||||
|
||||
## OpcUaPublishActor
|
||||
|
||||
The only actor on the **pinned dispatcher** (`opcua-synchronized-dispatcher` from `akka.conf`). All OPC UA SDK address-space writes go through it so the SDK's threading model isn't violated.
|
||||
|
||||
Message contracts are defined; actual SDK calls are stubbed (counters only). Real address-space writes + `ServiceLevel` Variable updates + `RebuildAddressSpace` after a deploy land in F10 (gated on F13 — full `OpcUaApplicationHost` extraction).
|
||||
|
||||
## HistorianAdapterActor, PeerOpcUaProbeActor
|
||||
|
||||
Both have message contracts wired. Engine integration deferred:
|
||||
|
||||
- `HistorianAdapterActor` — named-pipe IPC to the Wonderware historian sidecar + `SqliteStoreAndForwardSink` (F11).
|
||||
- `PeerOpcUaProbeActor` — real `opc.tcp://peer:4840` ping (F12). Current stub always returns `Ok=true`.
|
||||
|
||||
## DbHealthProbeActor
|
||||
|
||||
`Ask<DbHealthStatus>` returns cached state (refreshed every 5 s by an internal `SELECT 1`). Consumed by `/health/ready` and `RedundancyStateActor`.
|
||||
|
||||
## Lifecycle wiring
|
||||
|
||||
```csharp
|
||||
// Program.cs (driver role only)
|
||||
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
||||
{
|
||||
ab.WithOtOpcUaClusterBootstrap(sp);
|
||||
if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
|
||||
if (hasDriver) ab.WithOtOpcUaRuntimeActors();
|
||||
});
|
||||
```
|
||||
|
||||
`WithOtOpcUaRuntimeActors` resolves `IDbContextFactory<OtOpcUaConfigDbContext>` + `IClusterRoleInfo` from DI, then spawns `DbHealthProbeActor` and `DriverHostActor` as top-level `/user/` actors. Both register marker keys in `ActorRegistry` so the registry lookup works from anywhere.
|
||||
|
||||
## Tests
|
||||
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/` — 16 tests covering DriverHostActor (Steady ack, Applying transitions, Stale recovery), DriverInstanceActor (state machine, stub mode), VirtualTagActor + ScriptedAlarmActor (message contracts), OpcUaPublishActor (props + message acceptance), DbHealthProbe + PeerOpcUaProbe (probe loop), and the `WithOtOpcUaRuntimeActors` registration round-trip.
|
||||
|
||||
End-to-end deploy from admin → driver via the cluster is in `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/DeployHappyPathTests.cs`.
|
||||
+3
-3
@@ -36,7 +36,7 @@ Mirror ScadaLink's layout exactly:
|
||||
|
||||
```
|
||||
src/
|
||||
ZB.MOM.WW.OtOpcUa.Admin/ # Razor Components project (.NET 10)
|
||||
ZB.MOM.WW.OtOpcUa.AdminUI/ # Razor Components project (.NET 10)
|
||||
Auth/
|
||||
AuthEndpoints.cs # /auth/login, /auth/logout, /auth/token
|
||||
CookieAuthenticationStateProvider.cs # bridges cookie auth to Blazor <AuthorizeView>
|
||||
@@ -61,10 +61,10 @@ src/
|
||||
NotAuthorizedView.razor
|
||||
EndpointExtensions.cs # MapAuthEndpoints + role policies
|
||||
ServiceCollectionExtensions.cs # AddCentralAdmin
|
||||
ZB.MOM.WW.OtOpcUa.Admin.Security/ # LDAP + role mapping + JWT (sibling of ScadaLink.Security)
|
||||
ZB.MOM.WW.OtOpcUa.Security/ # LDAP + role mapping + JWT (sibling of ScadaLink.Security)
|
||||
```
|
||||
|
||||
The `Admin.Security` project carries `LdapAuthService`, `RoleMapper`, `JwtTokenService`, `AuthorizationPolicies`. If it ever makes sense to consolidate with ScadaLink's identical project, lift to a shared internal NuGet — out of scope for v2.0 to keep OtOpcUa decoupled from ScadaLink's release cycle.
|
||||
The `Security` project carries `LdapAuthService`, `RoleMapper`, `JwtTokenService`, `AuthorizationPolicies`. If it ever makes sense to consolidate with ScadaLink's identical project, lift to a shared internal NuGet — out of scope for v2.0 to keep OtOpcUa decoupled from ScadaLink's release cycle.
|
||||
|
||||
## Authentication & Authorization
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ Shipped as PR #183 (12 tests in configuration; 13 more in Admin.Tests).
|
||||
| F.4 — Test harness (modal, synthetic inputs, output + logger display) | **Partial** | `ScriptTestHarnessService.cs` is complete and tested. `ScriptsTab.razor` calls `Harness.RunVirtualTagAsync` with zero-value synthetic inputs derived from the extractor. A full interactive input-form modal was not shipped — the harness zeroes all inputs automatically rather than prompting the operator per-tag. |
|
||||
| F.5 — Script log viewer (SignalR tail of `scripts-*.log` filtered by `ScriptName`, load-more) | **Not started** | No SignalR stream of the scripts log is wired in the Admin UI. The `AlertHub` / `FleetStatusHub` exist but there is no `ScriptLogHub`. |
|
||||
| F.6 — `/alarms/historian` diagnostics view | **Done** | `AlarmsHistorian.razor` + `HistorianDiagnosticsService.cs` |
|
||||
| F.7 — Playwright smoke (author calc tag, verify in equipment tree; author alarm, verify in `AlarmsAndConditions`) | **Not started** | `tests/Server/ZB.MOM.WW.OtOpcUa.Admin.E2ETests/` exists but its `UnsTabDragDropE2ETests.cs` is the only Playwright test; no Phase 7 Admin UI playwright scenario. |
|
||||
| F.7 — Playwright smoke (author calc tag, verify in equipment tree; author alarm, verify in `AlarmsAndConditions`) | **Not started** | No Phase 7 Playwright/E2E project exists in the repo today; future-work item without an assigned path. |
|
||||
|
||||
Shipped as PR #185 (13 Admin service tests; UI completeness is partial — see gaps section).
|
||||
|
||||
@@ -190,8 +190,8 @@ The SignalR tail of `scripts-*.log` filtered by `ScriptName` was not implemented
|
||||
| `Core.VirtualTags` sources | `src/Core/ZB.MOM.WW.OtOpcUa.Core.VirtualTags/` |
|
||||
| `Core.ScriptedAlarms` sources | `src/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms/` |
|
||||
| `Core.AlarmHistorian` sources | `src/Core/ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian/` |
|
||||
| Server Phase7 composition | `src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/` |
|
||||
| Admin services | `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Services/Script*.cs`, `VirtualTagService.cs`, `HistorianDiagnosticsService.cs` |
|
||||
| Admin UI pages | `src/Server/ZB.MOM.WW.OtOpcUa.Admin/Components/Pages/Clusters/ScriptsTab.razor`, `AlarmsHistorian.razor` |
|
||||
| Server Phase7 composition | `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/Phase7Composer.cs`, `Phase7Applier.cs`, `Phase7Plan.cs` |
|
||||
| Admin services (CRUD writes) | `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs` (actor-driven); live state in `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/ScriptedAlarms/ScriptedAlarmActor.cs`, `Runtime/VirtualTags/VirtualTagActor.cs`; Roslyn engines in `src/Server/ZB.MOM.WW.OtOpcUa.Host/Engines/` — v1 `Admin/Services/Script*.cs`, `VirtualTagService.cs`, `HistorianDiagnosticsService.cs` deleted |
|
||||
| Admin UI pages | `src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Components/Pages/Scripts.razor`, `ScriptEdit.razor`, `ScriptedAlarms.razor`, `ScriptedAlarmEdit.razor`, `AlarmsHistorian.razor`, `VirtualTags.razor`, `VirtualTagEdit.razor` |
|
||||
| Historian sidecar writer | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs` |
|
||||
| EF migrations | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Migrations/20260420231641_AddPhase7ScriptingTables.cs`, `20260420232000_ExtendComputeGenerationDiffWithPhase7.cs` |
|
||||
|
||||
@@ -55,6 +55,7 @@ Each row is one manual run; pass criterion in the right column.
|
||||
| A2 | ServiceLevel updates on peer down | Connect to Primary. Stop Backup (`sc stop OtOpcUa`). Watch `ServiceLevel`. | Transitions 200 → 150 within ~2 s of peer probe timeout |
|
||||
| A3 | RedundancySupport | Browse to `Server.ServerRedundancy.RedundancySupport`. | Value matches the declared `RedundancyMode` (Warm / Hot / None) |
|
||||
| A4 | ServerUriArray (non-transparent upgrade) | Requires a redundancy-object-type upgrade follow-up. | When upgrade lands: `ServerUriArray` reports both ApplicationUris, self first |
|
||||
| A4b | Peer URI visibility via `Server.ServerArray` (i=2254) | Configure each `OpcUaApplicationHost` with the partner's `ApplicationUri` via `OpcUaApplicationHostOptions.PeerApplicationUris`. From any client, Read NodeId `i=2254` (`Server.ServerArray`). | Returned `String[]` includes both self + peer `ApplicationUri`s. Validated by `DualEndpointTests` in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/` (loopback dual-host with real OPCFoundation client `Session` read). |
|
||||
| A5 | Mid-apply dip | On Primary trigger a `sp_PublishGeneration` apply. | `ServiceLevel` drops to 75 for the apply duration + dwell |
|
||||
|
||||
### Block B — Client failover
|
||||
@@ -101,7 +102,9 @@ flips A4 from "deferred" to "expected pass").
|
||||
- **A4 pending**: `Server.ServerRedundancy` on our current SDK build lands as
|
||||
the base `ServerRedundancyState`, which has no `ServerUriArray` child.
|
||||
`ServerRedundancyNodeWriter.ApplyServerUriArray` logs-and-skips until the
|
||||
redundancy-object-type upgrade follow-up lands.
|
||||
redundancy-object-type upgrade follow-up lands. Cross-reference **A4b** —
|
||||
peer URIs are visible today via `Server.ServerArray` (i=2254) populated by
|
||||
`OpcUaApplicationHost.PopulateServerArray`.
|
||||
- **Recovery dwell default**: `RecoveryStateManager.DwellTime` defaults to 60 s
|
||||
in `Program.cs`. Adjust via future config knob if B3 takes too long to
|
||||
observe.
|
||||
@@ -121,8 +124,8 @@ flips A4 from "deferred" to "expected pass").
|
||||
redundancy implementations we don't control.
|
||||
- For the sub-set of scenarios that *can* be automated — the self-loopback
|
||||
case where our own `otopcua-cli` drives Primary + Backup — the existing
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/RedundancyStatePublisherTests` +
|
||||
`ServiceLevelCalculatorTests` (unit) + `ClusterTopologyLoaderTests`
|
||||
(integration) already cover the math + data path. The wire-level assertion
|
||||
that the values actually land on the right OPC UA nodes is covered by
|
||||
`ServerRedundancyNodeWriterTests`.
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/RedundancyStateActorTests` +
|
||||
`ServiceLevelCalculatorTests` (unit) already cover the math + data path.
|
||||
The wire-level assertion that the peer URIs actually land on the
|
||||
`Server.ServerArray` node (i=2254) is covered by `DualEndpointTests` in
|
||||
`tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/`.
|
||||
|
||||
@@ -57,7 +57,7 @@ Remaining follow-ups (hardening):
|
||||
Remaining Phase 6.3 surfaces (hardening, not release-blocking):
|
||||
|
||||
- ~~`PeerHttpProbeLoop` + `PeerUaProbeLoop` HostedServices populating `PeerReachabilityTracker` on each tick.~~ **Closed 2026-04-24.** Two-layer probe model shipped: HTTP probe at 2 s / 1 s timeout against `/healthz`; OPC UA probe at 10 s / 5 s timeout via `DiscoveryClient.GetEndpoints`, short-circuiting when HTTP reports the peer unhealthy. Registered on the Server as `AddHostedService<PeerHttpProbeLoop>` + `AddHostedService<PeerUaProbeLoop>`. Publisher now sees accurate `PeerReachability` per peer instead of degrading to `Unknown` → Isolated-Primary band (230).
|
||||
- OPC UA variable-node wiring: bind `ServiceLevel` Byte + `ServerUriArray` String[] to the publisher's events via `BaseDataVariable.OnReadValue` / direct value push.
|
||||
- ~~OPC UA variable-node wiring: bind `ServiceLevel` Byte + `ServerUriArray` String[] to the publisher's events via `BaseDataVariable.OnReadValue` / direct value push.~~ **Closed 2026-05-26.** `ServiceLevel` byte binding closed earlier under Path D. Peer-URI half closed via `OpcUaApplicationHost.PopulateServerArray` — populates self + each `PeerApplicationUris` entry into the SDK `IServerInternal.ServerUris` `StringTable`; clients read `Server.ServerArray` (NodeId `i=2254`). Validated by `DualEndpointTests` in `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/`. `ServerUriArray` proper (the redundancy-object-type child) remains deferred pending object-type upgrade.
|
||||
- ~~`sp_PublishGeneration` wraps its apply in `await using var lease = coordinator.BeginApplyLease(...)` so the `PrimaryMidApply` band (200) fires during actual publishes (task #148 part 2).~~ **Closed 2026-04-24.** The apply loop now lives in `GenerationRefreshHostedService` — polls `sp_GetCurrentGenerationForCluster` every 5s, opens a lease when a new generation is detected, calls `RedundancyCoordinator.RefreshAsync` inside the `await using`, releases the lease on all exit paths. Replaces the previous "topology never refreshes without a process restart" behaviour.
|
||||
- Client interop matrix — Ignition / Kepware / Aveva OI Gateway (Stream F, task #150). Manual + doc-only.
|
||||
|
||||
@@ -118,6 +118,7 @@ v2 GA requires all of the following:
|
||||
|
||||
## Change log
|
||||
|
||||
- **2026-05-26** — Gap-closeout pass. `OpcUaApplicationHost.PopulateServerArray` populates `Server.ServerArray` (NodeId `i=2254`) with self + `OpcUaApplicationHostOptions.PeerApplicationUris`, giving non-transparent peer URI visibility through the standard discovery surface. New `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests/` IT project (`DualEndpointTests`) validates with two real `OpcUaApplicationHost` instances on loopback + a live OPCFoundation client `Session` read. CI `v2-ci.yml` `integration:` job converted to a matrix across `Host.IntegrationTests` + `OpcUaServer.IntegrationTests`. Per-role appsettings overlays shipped (`appsettings.admin.json` / `appsettings.driver.json` / `appsettings.admin-driver.json`) — `Program.cs:33-35` loads by alphabetical-joined role suffix. `FailoverScenarioTests` → `FailoverDuringDeployTests` rename. Stale empty `src/Server/{Server,Admin}` + `tests/Server/{Server.Tests,Admin.Tests,Admin.E2ETests}` directories deleted (no source, absent from `.slnx`).
|
||||
- **2026-04-24** — Phase 5 driver complement closed (task #120 CLOSED). AB CIP, AB Legacy, TwinCAT, FOCAS all shipped. FOCAS migration: retired the Tier-C split (`Driver.FOCAS.Host` + `Driver.FOCAS.Shared` + `FwlibNative` + shim DLL deleted) in favour of a pure-managed in-process `FocasWireClient` inlined into `Driver.FOCAS`; driver is now read-only against the CNC by design. Integration test matrix grew to cover Browse / Subscribe / IAlarmSource / Probe end-to-end.
|
||||
- **2026-04-23** — Phase 6.4 audit close-out. IdentificationFolderBuilder + OPC 40010 Identification folder verified against the shipped code.
|
||||
- **2026-04-20** — Phase 7 plan drafted (`phase-7-scripting-and-alarming.md`, `phase-7-e2e-smoke.md`). Out of scope for v2 GA.
|
||||
|
||||
@@ -1,46 +1,63 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Registers the v2 Windows services on a node: OtOpcUa (main server, net10) and
|
||||
optionally OtOpcUaWonderwareHistorian (Wonderware historian sidecar).
|
||||
Registers the v2 Windows service on a node: OtOpcUaHost (fused binary, .NET 10)
|
||||
and optionally OtOpcUaWonderwareHistorian (Wonderware historian sidecar, net48 x86).
|
||||
|
||||
.DESCRIPTION
|
||||
PR 7.2 retired the legacy out-of-process OtOpcUaGalaxyHost service alongside the
|
||||
GalaxyProxyDriver / GalaxyHost / GalaxyShared projects. Galaxy access now flows
|
||||
through the in-process GalaxyDriver talking gRPC to a separately-installed
|
||||
mxaccessgw. The mxaccessgw server runs out of its own repo
|
||||
(`c:\Users\dohertj2\Desktop\mxaccessgw\`) — see
|
||||
`docs/v2/Galaxy.ParityRig.md` for the gw setup recipe.
|
||||
v2 consolidates the legacy OtOpcUa + OtOpcUaAdmin services into a single role-gated
|
||||
OtOpcUaHost binary. The -Roles parameter sets the OTOPCUA_ROLES service env so
|
||||
Program.cs decides what to mount (admin / driver / both). The Wonderware historian
|
||||
sidecar logic is unchanged from v1; install it with -InstallWonderwareHistorian.
|
||||
|
||||
Galaxy access flows through the mxaccessgw sibling repo (separate service); see
|
||||
docs/v2/Galaxy.ParityRig.md for the gateway setup.
|
||||
|
||||
.PARAMETER InstallRoot
|
||||
Where the binaries live (typically C:\Program Files\OtOpcUa).
|
||||
Where the binaries live (typically C:\Program Files\OtOpcUa). The OtOpcUaHost
|
||||
service runs OtOpcUa.Host.exe from this directory; publish the Host project there
|
||||
with `dotnet publish -c Release -r win-x64 --self-contained` first.
|
||||
|
||||
.PARAMETER ServiceAccount
|
||||
Service account SID or DOMAIN\name. The OtOpcUa service runs under this account.
|
||||
Service account SID or DOMAIN\name. The OtOpcUaHost service runs under this account.
|
||||
|
||||
.PARAMETER Roles
|
||||
Comma-separated cluster roles for this node. One of:
|
||||
- "admin,driver" — single-node dev or all-in-one production node
|
||||
- "admin" — admin-only HA pair member (Blazor + control-plane singletons)
|
||||
- "driver" — driver-only node (OPC UA endpoint + per-node actors)
|
||||
Written to the service env as OTOPCUA_ROLES.
|
||||
|
||||
.PARAMETER HttpPort
|
||||
HTTP port for the AdminUI + auth endpoints. Default 9000. Written as ASPNETCORE_URLS.
|
||||
Ignored on driver-only nodes (no Blazor surface).
|
||||
|
||||
.PARAMETER InstallWonderwareHistorian
|
||||
Gate the OtOpcUaWonderwareHistorian sidecar install. Off by default; set when
|
||||
the deployment uses the Wonderware historian for history reads + alarm-event
|
||||
persistence.
|
||||
Gate the OtOpcUaWonderwareHistorian sidecar install. Off by default; set when the
|
||||
deployment uses the Wonderware historian for history reads + alarm-event persistence.
|
||||
|
||||
.PARAMETER HistorianSharedSecret
|
||||
Per-process secret passed to the Historian sidecar via env var. Generated
|
||||
freshly per install when not supplied.
|
||||
Per-process secret passed to the historian sidecar via env var. Generated freshly
|
||||
per install when not supplied.
|
||||
|
||||
.EXAMPLE
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' -ServiceAccount 'OTOPCUA\svc-otopcua'
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' `
|
||||
-ServiceAccount 'OTOPCUA\svc-otopcua' -Roles 'admin,driver'
|
||||
|
||||
.EXAMPLE
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' -ServiceAccount 'OTOPCUA\svc-otopcua' `
|
||||
.\Install-Services.ps1 -InstallRoot 'C:\Program Files\OtOpcUa' `
|
||||
-ServiceAccount 'OTOPCUA\svc-otopcua' -Roles 'driver' `
|
||||
-InstallWonderwareHistorian
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[Parameter(Mandatory)] [string]$InstallRoot,
|
||||
[Parameter(Mandatory)] [string]$ServiceAccount,
|
||||
[Parameter(Mandatory)] [ValidateSet('admin', 'driver', 'admin,driver', 'driver,admin')]
|
||||
[string]$Roles,
|
||||
[int]$HttpPort = 9000,
|
||||
|
||||
# PR 3.W — Wonderware historian sidecar. Optional; gates the
|
||||
# OtOpcUaWonderwareHistorian service. Secret + pipe defaults match the server's
|
||||
# Historian:Wonderware appsettings block.
|
||||
# Wonderware historian sidecar. Optional; gates the OtOpcUaWonderwareHistorian
|
||||
# service. Secret + pipe defaults match the server's Historian:Wonderware appsettings.
|
||||
[switch]$InstallWonderwareHistorian,
|
||||
[string]$HistorianSharedSecret,
|
||||
[string]$HistorianPipeName = 'OtOpcUaWonderwareHistorian',
|
||||
@@ -51,18 +68,19 @@ param(
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
if (-not (Test-Path "$InstallRoot\OtOpcUa.Server.exe")) {
|
||||
Write-Error "OtOpcUa.Server.exe not found at $InstallRoot — copy the publish output first"
|
||||
if (-not (Test-Path "$InstallRoot\OtOpcUa.Host.exe")) {
|
||||
Write-Error "OtOpcUa.Host.exe not found at $InstallRoot — copy the publish output first"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Generate fresh shared secrets per install if not supplied.
|
||||
function New-SharedSecret {
|
||||
$bytes = New-Object byte[] 32
|
||||
[System.Security.Cryptography.RandomNumberGenerator]::Create().GetBytes($bytes)
|
||||
return [Convert]::ToBase64String($bytes)
|
||||
}
|
||||
if ($InstallWonderwareHistorian -and -not $HistorianSharedSecret) { $HistorianSharedSecret = New-SharedSecret }
|
||||
if ($InstallWonderwareHistorian -and -not $HistorianSharedSecret) {
|
||||
$HistorianSharedSecret = New-SharedSecret
|
||||
}
|
||||
|
||||
if ($InstallWonderwareHistorian -and -not (Test-Path "$InstallRoot\WonderwareHistorian\OtOpcUa.Driver.Historian.Wonderware.exe")) {
|
||||
Write-Error "OtOpcUa.Driver.Historian.Wonderware.exe not found at $InstallRoot\WonderwareHistorian — copy the publish output first"
|
||||
@@ -76,10 +94,7 @@ $sid = if ($ServiceAccount.StartsWith('S-1-')) {
|
||||
(New-Object System.Security.Principal.NTAccount $ServiceAccount).Translate([System.Security.Principal.SecurityIdentifier]).Value
|
||||
}
|
||||
|
||||
# --- Install OtOpcUaWonderwareHistorian (PR 3.W) — separate sidecar that exposes the
|
||||
# Wonderware Historian SDK via a named-pipe protocol consumed by the .NET 10 server.
|
||||
# Optional: only installed when -InstallWonderwareHistorian is supplied. Depends on the
|
||||
# hard AVEVA services that host the historian SDK runtime path.
|
||||
# --- OtOpcUaWonderwareHistorian sidecar (optional, unchanged from v1) -------
|
||||
$historianDepend = $null
|
||||
if ($InstallWonderwareHistorian) {
|
||||
$historianEnv = @(
|
||||
@@ -87,14 +102,10 @@ if ($InstallWonderwareHistorian) {
|
||||
"OTOPCUA_ALLOWED_SID=$sid"
|
||||
"OTOPCUA_HISTORIAN_SECRET=$HistorianSharedSecret"
|
||||
"OTOPCUA_HISTORIAN_ENABLED=true"
|
||||
# Default-on when the historian sidecar is installed; flip to false for a
|
||||
# read-only deployment that still loads aahClientManaged for reads but
|
||||
# rejects WriteAlarmEvents frames.
|
||||
"OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED=true"
|
||||
"OTOPCUA_HISTORIAN_SERVER=$HistorianServer"
|
||||
"OTOPCUA_HISTORIAN_PORT=$HistorianPort"
|
||||
) -join "`0"
|
||||
$historianEnv += "`0`0"
|
||||
)
|
||||
|
||||
Write-Host "Installing OtOpcUaWonderwareHistorian..."
|
||||
& sc.exe create OtOpcUaWonderwareHistorian binPath= "`"$InstallRoot\WonderwareHistorian\OtOpcUa.Driver.Historian.Wonderware.exe`"" `
|
||||
@@ -105,36 +116,59 @@ if ($InstallWonderwareHistorian) {
|
||||
& sc.exe config OtOpcUaWonderwareHistorian start= delayed-auto | Out-Null
|
||||
|
||||
$svcKey = "HKLM:\SYSTEM\CurrentControlSet\Services\OtOpcUaWonderwareHistorian"
|
||||
$envValue = $historianEnv.Split("`0") | Where-Object { $_ -ne '' }
|
||||
Set-ItemProperty -Path $svcKey -Name 'Environment' -Type MultiString -Value $envValue
|
||||
Set-ItemProperty -Path $svcKey -Name 'Environment' -Type MultiString -Value $historianEnv
|
||||
|
||||
& sc.exe failure OtOpcUaWonderwareHistorian reset= 86400 actions= restart/5000/restart/30000/restart/60000 | Out-Null
|
||||
|
||||
$historianDepend = 'OtOpcUaWonderwareHistorian'
|
||||
}
|
||||
|
||||
# --- Install OtOpcUa. Galaxy access flows through GalaxyDriver → mxaccessgw (gRPC),
|
||||
# so OtOpcUa no longer depends on a sibling service for Galaxy connectivity. The
|
||||
# mxaccessgw is installed separately. When the Wonderware sidecar is installed,
|
||||
# depend on it for startup ordering.
|
||||
$otOpcUaDepends = @()
|
||||
if ($historianDepend) { $otOpcUaDepends += $historianDepend }
|
||||
# --- OtOpcUaHost (the fused v2 binary) --------------------------------------
|
||||
$normalisedRoles = ($Roles -split ',' | ForEach-Object { $_.Trim() } | Sort-Object -Unique) -join ','
|
||||
|
||||
Write-Host "Installing OtOpcUa..."
|
||||
$hasAdmin = $normalisedRoles -split ',' -contains 'admin'
|
||||
|
||||
$hostEnv = @(
|
||||
"OTOPCUA_ROLES=$normalisedRoles",
|
||||
'DOTNET_ENVIRONMENT=Production'
|
||||
)
|
||||
if ($hasAdmin) {
|
||||
$hostEnv += "ASPNETCORE_URLS=http://+:$HttpPort"
|
||||
}
|
||||
|
||||
$hostDepends = @()
|
||||
if ($historianDepend) { $hostDepends += $historianDepend }
|
||||
|
||||
Write-Host "Installing OtOpcUaHost (roles=$normalisedRoles)..."
|
||||
$createArgs = @(
|
||||
'create', 'OtOpcUa',
|
||||
'binPath=', "`"$InstallRoot\OtOpcUa.Server.exe`"",
|
||||
'DisplayName=', 'OtOpcUa Server',
|
||||
'create', 'OtOpcUaHost',
|
||||
'binPath=', "`"$InstallRoot\OtOpcUa.Host.exe`"",
|
||||
'DisplayName=', "OtOpcUa Host ($normalisedRoles)",
|
||||
'start=', 'auto',
|
||||
'obj=', $ServiceAccount
|
||||
)
|
||||
if ($otOpcUaDepends.Count -gt 0) {
|
||||
$createArgs += @('depend=', ($otOpcUaDepends -join '/'))
|
||||
if ($hostDepends.Count -gt 0) {
|
||||
$createArgs += @('depend=', ($hostDepends -join '/'))
|
||||
}
|
||||
& sc.exe @createArgs | Out-Null
|
||||
|
||||
# Env block via registry MultiString (sc.exe doesn't take env directly).
|
||||
$svcKey = "HKLM:\SYSTEM\CurrentControlSet\Services\OtOpcUaHost"
|
||||
Set-ItemProperty -Path $svcKey -Name 'Environment' -Type MultiString -Value $hostEnv
|
||||
|
||||
# Restart-on-failure: 5s, 30s, 60s; reset counter after a clean 24h run.
|
||||
& sc.exe failure OtOpcUaHost reset= 86400 actions= restart/5000/restart/30000/restart/60000 | Out-Null
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Installed. Start with:"
|
||||
Write-Host "Installed OtOpcUaHost:"
|
||||
Write-Host " Roles: $normalisedRoles"
|
||||
if ($hasAdmin) { Write-Host " HTTP port: $HttpPort" }
|
||||
Write-Host " Binary: $InstallRoot\OtOpcUa.Host.exe"
|
||||
Write-Host " Account: $ServiceAccount"
|
||||
Write-Host ""
|
||||
Write-Host "Start with:"
|
||||
if ($InstallWonderwareHistorian) { Write-Host " sc.exe start OtOpcUaWonderwareHistorian" }
|
||||
Write-Host " sc.exe start OtOpcUa"
|
||||
Write-Host " sc.exe start OtOpcUaHost"
|
||||
if ($InstallWonderwareHistorian) {
|
||||
Write-Host ""
|
||||
Write-Host "Wonderware historian shared secret (configure into appsettings.json Historian:Wonderware:SharedSecret):"
|
||||
@@ -142,5 +176,5 @@ if ($InstallWonderwareHistorian) {
|
||||
}
|
||||
Write-Host ""
|
||||
Write-Host "NOTE: Galaxy access flows through mxaccessgw — install + run that separately"
|
||||
Write-Host " per docs/v2/Galaxy.ParityRig.md. OtOpcUa connects via the Galaxy.Gateway"
|
||||
Write-Host " section of appsettings.json (default endpoint http://localhost:5120)."
|
||||
Write-Host " per docs/v2/Galaxy.ParityRig.md. OtOpcUaHost connects via the"
|
||||
Write-Host " Galaxy.Gateway section of appsettings.json (default http://localhost:5120)."
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Installs Traefik as a Windows service that routes admin HTTP traffic to whichever
|
||||
OtOpcUa.Host node holds the admin role-leader (via /health/active).
|
||||
|
||||
.DESCRIPTION
|
||||
Downloads the Traefik Windows binary into $InstallRoot, drops traefik.yml +
|
||||
traefik-dynamic.yml from this directory next to it, and registers Traefik as a
|
||||
Windows service via sc.exe with restart-on-failure.
|
||||
|
||||
Companion to Install-Services.ps1. Run on the box that fronts the admin HTTP
|
||||
traffic (typically a separate node from OtOpcUaHost, or co-located on the
|
||||
primary admin node).
|
||||
|
||||
.PARAMETER InstallRoot
|
||||
Where the Traefik binary + config land. Default 'C:\Program Files\Traefik'.
|
||||
|
||||
.PARAMETER TraefikVersion
|
||||
Traefik version to download. Default 'v3.1.6'.
|
||||
|
||||
.EXAMPLE
|
||||
.\Install-Traefik.ps1 -InstallRoot 'C:\Program Files\Traefik'
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[string]$InstallRoot = 'C:\Program Files\Traefik',
|
||||
[string]$TraefikVersion = 'v3.1.6'
|
||||
)
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
if (-not (Test-Path $InstallRoot)) {
|
||||
New-Item -ItemType Directory -Path $InstallRoot | Out-Null
|
||||
}
|
||||
|
||||
$zip = Join-Path $env:TEMP "traefik-$TraefikVersion.zip"
|
||||
$url = "https://github.com/traefik/traefik/releases/download/$TraefikVersion/traefik_${TraefikVersion}_windows_amd64.zip"
|
||||
|
||||
Write-Host "Downloading Traefik $TraefikVersion..."
|
||||
Invoke-WebRequest -Uri $url -OutFile $zip
|
||||
Expand-Archive -Path $zip -DestinationPath $InstallRoot -Force
|
||||
Remove-Item $zip
|
||||
|
||||
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
|
||||
Copy-Item -Force (Join-Path $scriptDir 'traefik.yml') $InstallRoot
|
||||
Copy-Item -Force (Join-Path $scriptDir 'traefik-dynamic.yml') (Join-Path $InstallRoot 'dynamic.yml')
|
||||
|
||||
# Traefik reads dynamic.yml from /etc/traefik on Linux; on Windows place it next to the
|
||||
# binary and point the file provider at it. Edit traefik.yml's `filename:` if you want
|
||||
# to change the location.
|
||||
(Get-Content -Raw (Join-Path $InstallRoot 'traefik.yml')) `
|
||||
-replace '/etc/traefik/dynamic.yml', (Join-Path $InstallRoot 'dynamic.yml').Replace('\', '/') `
|
||||
| Set-Content (Join-Path $InstallRoot 'traefik.yml')
|
||||
|
||||
Write-Host "Installing Traefik Windows service..."
|
||||
& sc.exe create OtOpcUaTraefik binPath= "`"$InstallRoot\traefik.exe`" --configFile=`"$InstallRoot\traefik.yml`"" `
|
||||
DisplayName= 'OtOpcUa Traefik (admin HTTP front door)' `
|
||||
start= auto | Out-Null
|
||||
|
||||
& sc.exe failure OtOpcUaTraefik reset= 86400 actions= restart/5000/restart/30000/restart/60000 | Out-Null
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Installed OtOpcUaTraefik. Edit:"
|
||||
Write-Host " $InstallRoot\dynamic.yml (router + service definitions)"
|
||||
Write-Host "Start with:"
|
||||
Write-Host " sc.exe start OtOpcUaTraefik"
|
||||
Write-Host ""
|
||||
Write-Host "Traefik dashboard: http://localhost:8080 (turn off api.insecure in production)"
|
||||
@@ -43,11 +43,11 @@ function Test-NssmService([string]$Name) {
|
||||
# Step 1: Stop in reverse dependency order
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
Step "Stopping services (OtOpcUa → OtOpcUaWonderwareHistorian → MxAccessGw)"
|
||||
Step "Stopping services (OtOpcUaHost > OtOpcUaWonderwareHistorian > MxAccessGw)"
|
||||
|
||||
foreach ($name in @('OtOpcUa', 'OtOpcUaWonderwareHistorian', 'MxAccessGw')) {
|
||||
foreach ($name in @('OtOpcUaHost', 'OtOpcUaWonderwareHistorian', 'MxAccessGw')) {
|
||||
if (Test-NssmService $name) {
|
||||
Run { nssm stop $name } "stop $name"
|
||||
Run { Stop-Service $name -Force -ErrorAction SilentlyContinue } "stop $name"
|
||||
}
|
||||
else {
|
||||
Write-Host " ($name not installed; skipping)" -ForegroundColor DarkGray
|
||||
@@ -56,7 +56,7 @@ foreach ($name in @('OtOpcUa', 'OtOpcUaWonderwareHistorian', 'MxAccessGw')) {
|
||||
|
||||
if (-not $WhatIf) {
|
||||
Start-Sleep -Seconds 3
|
||||
Get-Process MxGateway.Server, MxGateway.Worker, OtOpcUa.Server, OtOpcUa.Driver.Historian.Wonderware -ErrorAction SilentlyContinue |
|
||||
Get-Process MxGateway.Server, MxGateway.Worker, OtOpcUa.Host, OtOpcUa.Driver.Historian.Wonderware -ErrorAction SilentlyContinue |
|
||||
ForEach-Object {
|
||||
Write-Host " killing residual process $($_.ProcessName) (PID=$($_.Id))" -ForegroundColor DarkYellow
|
||||
Stop-Process -Id $_.Id -Force -ErrorAction SilentlyContinue
|
||||
@@ -109,14 +109,14 @@ Run {
|
||||
# Step 4: Refresh OtOpcUa + Wonderware historian sidecar
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
Step "Publishing OtOpcUa server + Wonderware historian sidecar from $RepoRoot"
|
||||
Step "Publishing OtOpcUa.Host + Wonderware historian sidecar from $RepoRoot"
|
||||
|
||||
Run {
|
||||
& dotnet publish "$RepoRoot\src\Server\ZB.MOM.WW.OtOpcUa.Server" `
|
||||
& dotnet publish "$RepoRoot\src\Server\ZB.MOM.WW.OtOpcUa.Host" `
|
||||
-c Release -o (Join-Path $PublishRoot "lmxopcua") | Out-Null
|
||||
& dotnet publish "$RepoRoot\src\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware" `
|
||||
-c Release -o (Join-Path $PublishRoot "lmxopcua\WonderwareHistorian") | Out-Null
|
||||
} "dotnet publish (Server + sidecar)"
|
||||
} "dotnet publish (Host + sidecar)"
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Step 5: Service env block — ensure OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED
|
||||
@@ -143,16 +143,16 @@ if (Test-NssmService 'OtOpcUaWonderwareHistorian') {
|
||||
# Step 6: Start in forward dependency order
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
Step "Starting services (MxAccessGw → OtOpcUaWonderwareHistorian → OtOpcUa)"
|
||||
Step "Starting services (MxAccessGw > OtOpcUaWonderwareHistorian > OtOpcUaHost)"
|
||||
|
||||
foreach ($pair in @(
|
||||
@{ Name = 'MxAccessGw'; Wait = 4 },
|
||||
@{ Name = 'OtOpcUaWonderwareHistorian'; Wait = 4 },
|
||||
@{ Name = 'OtOpcUa'; Wait = 8 }
|
||||
@{ Name = 'OtOpcUaHost'; Wait = 8 }
|
||||
)) {
|
||||
$name = $pair.Name
|
||||
if (Test-NssmService $name) {
|
||||
Run { nssm start $name } "start $name"
|
||||
Run { Start-Service $name } "start $name"
|
||||
if (-not $WhatIf) { Start-Sleep -Seconds $pair.Wait }
|
||||
}
|
||||
else {
|
||||
@@ -167,7 +167,7 @@ foreach ($pair in @(
|
||||
Step "Smoke verification"
|
||||
|
||||
if (-not $WhatIf) {
|
||||
foreach ($name in @('MxAccessGw', 'OtOpcUaWonderwareHistorian', 'OtOpcUa')) {
|
||||
foreach ($name in @('MxAccessGw', 'OtOpcUaWonderwareHistorian', 'OtOpcUaHost')) {
|
||||
if (Test-NssmService $name) {
|
||||
$status = (Get-Service $name).Status
|
||||
$color = if ($status -eq 'Running') { 'Green' } else { 'Red' }
|
||||
|
||||
@@ -3,16 +3,17 @@
|
||||
Stops + removes the v2 services. Mirrors Install-Services.ps1.
|
||||
|
||||
.DESCRIPTION
|
||||
PR 7.2 retired the legacy OtOpcUaGalaxyHost service. Galaxy access now flows
|
||||
through the in-process GalaxyDriver against a separately-installed mxaccessgw.
|
||||
OtOpcUaGalaxyHost is included in the cleanup loop below so this script safely
|
||||
removes it from any rig still carrying the legacy service from a pre-7.2
|
||||
install.
|
||||
Removes the v2 OtOpcUaHost service plus the optional OtOpcUaWonderwareHistorian
|
||||
sidecar. Also cleans up legacy service names from prior installs:
|
||||
- OtOpcUa (v1 server) — replaced by OtOpcUaHost in v2
|
||||
- OtOpcUaAdmin (v1 admin) — fused into OtOpcUaHost in v2
|
||||
- OtOpcUaGalaxyHost (pre-7.2 Galaxy host) — long-retired
|
||||
#>
|
||||
[CmdletBinding()] param()
|
||||
$ErrorActionPreference = 'Continue'
|
||||
|
||||
foreach ($svc in 'OtOpcUa', 'OtOpcUaWonderwareHistorian', 'OtOpcUaGalaxyHost') {
|
||||
foreach ($svc in 'OtOpcUaHost', 'OtOpcUaWonderwareHistorian',
|
||||
'OtOpcUa', 'OtOpcUaAdmin', 'OtOpcUaGalaxyHost') {
|
||||
if (Get-Service $svc -ErrorAction SilentlyContinue) {
|
||||
Write-Host "Stopping $svc..."
|
||||
Stop-Service $svc -Force -ErrorAction SilentlyContinue
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
# Dynamic (file-provider) Traefik config for the OtOpcUa admin HTTP routing.
|
||||
# Picked up by traefik.yml's file provider (with watch: true) so router/service
|
||||
# edits hot-reload without a Traefik restart.
|
||||
|
||||
http:
|
||||
routers:
|
||||
otopcua-admin:
|
||||
entryPoints: ["web"]
|
||||
rule: "HostRegexp(`otopcua.*`)"
|
||||
service: otopcua-admin
|
||||
|
||||
services:
|
||||
otopcua-admin:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://admin-a:9000"
|
||||
- url: "http://admin-b:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
# Default expected status is 2xx. Followers return 503 from
|
||||
# /health/active so Traefik will drop them from the balancer
|
||||
# within the next interval after a leadership change.
|
||||
@@ -0,0 +1,30 @@
|
||||
# Traefik static configuration for the OtOpcUa fleet HTTP front door.
|
||||
#
|
||||
# Routes admin-role HTTP traffic (Blazor + auth + SignalR + /auth/*) to whichever
|
||||
# OtOpcUa.Host node currently holds the admin role-leader. Uses the /health/active
|
||||
# endpoint as the active-leader signal: a node returns 200 only when it is the
|
||||
# Akka admin role-leader; followers return 503 and Traefik routes around them.
|
||||
#
|
||||
# OPC UA traffic is NOT routed through Traefik — clients connect directly to
|
||||
# opc.tcp://node:4840 on every driver node and use the standard ServiceLevel
|
||||
# heuristic for failover.
|
||||
|
||||
entryPoints:
|
||||
web:
|
||||
address: ":80"
|
||||
|
||||
providers:
|
||||
file:
|
||||
filename: /etc/traefik/dynamic.yml
|
||||
watch: true
|
||||
|
||||
api:
|
||||
insecure: true
|
||||
dashboard: true
|
||||
|
||||
log:
|
||||
level: INFO
|
||||
format: common
|
||||
|
||||
accessLog:
|
||||
format: common
|
||||
@@ -0,0 +1,59 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Idempotent migration runner that takes the OtOpcUaConfig database from the v1 schema
|
||||
(with ConfigGeneration / ClusterNodeGenerationState) to the v2 hosting-aligned schema
|
||||
(with Deployment / NodeDeploymentState / ConfigEdit / DataProtectionKeys).
|
||||
|
||||
.DESCRIPTION
|
||||
Backs the database up, applies the idempotent EF migration script, then validates that
|
||||
expected tables exist and legacy tables are gone. Safe to re-run — the EF script itself
|
||||
is idempotent, and the backup picks a unique filename per invocation.
|
||||
|
||||
.PARAMETER ConnectionString
|
||||
Mandatory. Full ADO.NET connection string with permissions to BACKUP DATABASE and
|
||||
apply DDL on the target ConfigDb.
|
||||
|
||||
.PARAMETER BackupPath
|
||||
Optional. Full path for the backup file. Defaults to a timestamped path under $env:TEMP.
|
||||
|
||||
.EXAMPLE
|
||||
.\Migrate-To-V2.ps1 -ConnectionString "Server=sql01;Database=OtOpcUaConfig;Trusted_Connection=True;TrustServerCertificate=True"
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[Parameter(Mandatory)][string] $ConnectionString,
|
||||
[string] $BackupPath = "$env:TEMP\OtOpcUa-V1-Backup-$(Get-Date -Format yyyyMMddHHmmss).bak"
|
||||
)
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
if (-not (Get-Command Invoke-Sqlcmd -ErrorAction SilentlyContinue)) {
|
||||
throw "Invoke-Sqlcmd not available. Install module: Install-Module SqlServer -Scope CurrentUser"
|
||||
}
|
||||
|
||||
Write-Host "Step 1/4 — Backup ConfigDb to $BackupPath" -ForegroundColor Cyan
|
||||
Invoke-Sqlcmd -ConnectionString $ConnectionString `
|
||||
-Query "BACKUP DATABASE [OtOpcUaConfig] TO DISK = '$BackupPath' WITH FORMAT, COMPRESSION"
|
||||
|
||||
Write-Host "Step 2/4 — Row counts (before)" -ForegroundColor Cyan
|
||||
$beforeCounts = Invoke-Sqlcmd -ConnectionString $ConnectionString -InputFile "$PSScriptRoot\count-rows.sql"
|
||||
$beforeCounts | Format-Table
|
||||
|
||||
Write-Host "Step 3/4 — Apply Migrate-To-V2.sql" -ForegroundColor Cyan
|
||||
Invoke-Sqlcmd -ConnectionString $ConnectionString -InputFile "$PSScriptRoot\Migrate-To-V2.sql" -QueryTimeout 1800
|
||||
|
||||
Write-Host "Step 4/4 — Row counts (after) + validation" -ForegroundColor Cyan
|
||||
$afterCounts = Invoke-Sqlcmd -ConnectionString $ConnectionString -InputFile "$PSScriptRoot\count-rows.sql"
|
||||
$afterCounts | Format-Table
|
||||
|
||||
$tablesNow = (Invoke-Sqlcmd -ConnectionString $ConnectionString `
|
||||
-Query "SELECT name FROM sys.tables ORDER BY name").name
|
||||
|
||||
foreach ($t in 'Deployment','NodeDeploymentState','ConfigEdit','DataProtectionKeys') {
|
||||
if ($tablesNow -notcontains $t) { throw "Expected v2 table $t missing." }
|
||||
}
|
||||
foreach ($t in 'ConfigGeneration','ClusterNodeGenerationState') {
|
||||
if ($tablesNow -contains $t) { throw "Legacy v1 table $t still present." }
|
||||
}
|
||||
|
||||
Write-Host "Migration complete. Backup at $BackupPath" -ForegroundColor Green
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,26 @@
|
||||
-- Per-table row counts for pre/post-migration audit.
|
||||
-- Covers every table relevant to the v1 -> v2 transition so the operator can confirm
|
||||
-- live-edit data was preserved and v2 tables came up empty.
|
||||
|
||||
SELECT TableName = t.name, [Rows] = SUM(p.[rows])
|
||||
FROM sys.tables t
|
||||
JOIN sys.partitions p ON p.object_id = t.object_id AND p.index_id IN (0,1)
|
||||
WHERE t.name IN (
|
||||
-- Live-edit configuration (rows must survive)
|
||||
'ServerCluster','ClusterNode','ClusterNodeCredential',
|
||||
'Namespace','UnsArea','UnsLine',
|
||||
'DriverInstance','Device','Equipment','Tag','PollGroup','VirtualTag',
|
||||
'NodeAcl','ExternalIdReservation',
|
||||
'Script','ScriptedAlarm','ScriptedAlarmState',
|
||||
'LdapGroupRoleMapping',
|
||||
'EquipmentImportBatch','EquipmentImportRow',
|
||||
-- Status tables (rebuilt at runtime; counts informational)
|
||||
'DriverHostStatus','DriverInstanceResilienceStatus',
|
||||
-- Audit (preserved)
|
||||
'ConfigAuditLog',
|
||||
-- v2 deploy model (empty pre-migration, populated post)
|
||||
'Deployment','NodeDeploymentState','ConfigEdit','DataProtectionKeys'
|
||||
)
|
||||
GROUP BY t.name
|
||||
ORDER BY t.name;
|
||||
GO
|
||||
@@ -9,9 +9,9 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CliFx" Version="2.3.6"/>
|
||||
<PackageReference Include="Serilog" Version="4.2.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.0.0"/>
|
||||
<PackageReference Include="CliFx"/>
|
||||
<PackageReference Include="Serilog"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Client" Version="1.5.378.106"/>
|
||||
<PackageReference Include="Serilog" Version="4.2.0"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Client"/>
|
||||
<PackageReference Include="Serilog"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -9,17 +9,17 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Avalonia" Version="11.2.7"/>
|
||||
<PackageReference Include="Avalonia.Desktop" Version="11.2.7"/>
|
||||
<PackageReference Include="Avalonia.Svg.Skia" Version="11.2.0.2"/>
|
||||
<PackageReference Include="Avalonia.Themes.Fluent" Version="11.2.7"/>
|
||||
<PackageReference Include="Avalonia.Fonts.Inter" Version="11.2.7"/>
|
||||
<PackageReference Include="Avalonia.Controls.DataGrid" Version="11.2.7"/>
|
||||
<PackageReference Include="Avalonia.Diagnostics" Version="11.2.7"/>
|
||||
<PackageReference Include="CommunityToolkit.Mvvm" Version="8.4.0"/>
|
||||
<PackageReference Include="Serilog" Version="4.2.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.0.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0"/>
|
||||
<PackageReference Include="Avalonia"/>
|
||||
<PackageReference Include="Avalonia.Desktop"/>
|
||||
<PackageReference Include="Avalonia.Svg.Skia"/>
|
||||
<PackageReference Include="Avalonia.Themes.Fluent"/>
|
||||
<PackageReference Include="Avalonia.Fonts.Inter"/>
|
||||
<PackageReference Include="Avalonia.Controls.DataGrid"/>
|
||||
<PackageReference Include="Avalonia.Diagnostics"/>
|
||||
<PackageReference Include="CommunityToolkit.Mvvm"/>
|
||||
<PackageReference Include="Serilog"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console"/>
|
||||
<PackageReference Include="Serilog.Sinks.File"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
@@ -35,4 +35,11 @@
|
||||
<EmbeddedResource Include="Assets\app-icon.svg" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- Tmds.DBus.Protocol 0.20.0 reaches this project transitively from Avalonia.Desktop on
|
||||
Linux/macOS only. We do not ship Linux/macOS builds of the Client.UI to end users;
|
||||
this advisory affects dev-tooling code paths only. -->
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-xrw6-gwf8-vvr9"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Cluster;
|
||||
|
||||
public sealed class AkkaClusterOptions
|
||||
{
|
||||
public const string SectionName = "Cluster";
|
||||
|
||||
public string SystemName { get; set; } = "otopcua";
|
||||
public string Hostname { get; set; } = "0.0.0.0";
|
||||
public int Port { get; set; } = 4053;
|
||||
|
||||
/// <summary>
|
||||
/// Hostname advertised in cluster gossip. Must be reachable by other nodes.
|
||||
/// In docker-compose this is the container DNS name; in bare metal it's the
|
||||
/// host's stable LAN address.
|
||||
/// </summary>
|
||||
public string PublicHostname { get; set; } = "127.0.0.1";
|
||||
|
||||
public string[] SeedNodes { get; set; } = Array.Empty<string>();
|
||||
|
||||
/// <summary>
|
||||
/// Cluster roles for this node. When empty the role list comes from
|
||||
/// <c>OTOPCUA_ROLES</c> via <see cref="RoleParser"/>. Allowed values:
|
||||
/// <c>admin</c>, <c>driver</c>, <c>dev</c>.
|
||||
/// </summary>
|
||||
public string[] Roles { get; set; } = Array.Empty<string>();
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
using CommonsNodeId = ZB.MOM.WW.OtOpcUa.Commons.Types.NodeId;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Cluster;
|
||||
|
||||
/// <summary>
|
||||
/// Thread-safe live view of cluster membership and role topology. Subscribes to
|
||||
/// <see cref="ClusterEvent.IMemberEvent"/>, <see cref="ClusterEvent.RoleLeaderChanged"/>, and
|
||||
/// <see cref="ClusterEvent.LeaderChanged"/> through an internal subscriber actor and keeps
|
||||
/// a snapshot of role-to-members + role-to-leader. The CLR-facing event surface is
|
||||
/// <see cref="IClusterRoleInfo.RoleLeaderChanged"/>.
|
||||
/// </summary>
|
||||
public sealed class ClusterRoleInfo : IClusterRoleInfo, IDisposable
|
||||
{
|
||||
private readonly Akka.Cluster.Cluster _cluster;
|
||||
private readonly ILogger<ClusterRoleInfo> _logger;
|
||||
private readonly CommonsNodeId _localNode;
|
||||
private readonly HashSet<string> _localRoles;
|
||||
private readonly object _lock = new();
|
||||
private readonly Dictionary<string, Member?> _roleLeaders = new(StringComparer.Ordinal);
|
||||
private readonly Dictionary<string, HashSet<Member>> _membersByRole = new(StringComparer.Ordinal);
|
||||
private IActorRef? _subscriber;
|
||||
|
||||
public ClusterRoleInfo(ActorSystem system, IOptions<AkkaClusterOptions> options, ILogger<ClusterRoleInfo> logger)
|
||||
{
|
||||
_cluster = Akka.Cluster.Cluster.Get(system);
|
||||
_logger = logger;
|
||||
// NodeId encodes host:port so cluster members on shared hosts (test loopback, dev VMs
|
||||
// sharing a bind IP) stay distinct. Production hosts have unique DNS names so the port
|
||||
// suffix is harmless redundancy.
|
||||
_localNode = CommonsNodeId.Parse($"{options.Value.PublicHostname}:{options.Value.Port}");
|
||||
_localRoles = new HashSet<string>(options.Value.Roles, StringComparer.Ordinal);
|
||||
|
||||
SeedFromCurrentState();
|
||||
_subscriber = system.ActorOf(Props.Create(() => new SubscriberActor(this)), "clusterroleinfo-subscriber");
|
||||
}
|
||||
|
||||
public CommonsNodeId LocalNode => _localNode;
|
||||
|
||||
public IReadOnlySet<string> LocalRoles => _localRoles;
|
||||
|
||||
public bool HasRole(string role) => _localRoles.Contains(role);
|
||||
|
||||
public IReadOnlyList<CommonsNodeId> MembersWithRole(string role)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_membersByRole.TryGetValue(role, out var members)) return Array.Empty<CommonsNodeId>();
|
||||
return members
|
||||
.Select(m => ToNodeId(m.Address))
|
||||
.ToArray();
|
||||
}
|
||||
}
|
||||
|
||||
public CommonsNodeId? RoleLeader(string role)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _roleLeaders.TryGetValue(role, out var leader) && leader is not null
|
||||
? ToNodeId(leader.Address)
|
||||
: (CommonsNodeId?)null;
|
||||
}
|
||||
}
|
||||
|
||||
public event EventHandler<RoleLeaderChangedEventArgs>? RoleLeaderChanged;
|
||||
|
||||
private void SeedFromCurrentState()
|
||||
{
|
||||
var snapshot = _cluster.State;
|
||||
lock (_lock)
|
||||
{
|
||||
foreach (var member in snapshot.Members)
|
||||
foreach (var role in member.Roles)
|
||||
{
|
||||
if (!_membersByRole.TryGetValue(role, out var set))
|
||||
_membersByRole[role] = set = new HashSet<Member>();
|
||||
set.Add(member);
|
||||
}
|
||||
|
||||
foreach (var role in snapshot.Members.SelectMany(m => m.Roles).Distinct())
|
||||
{
|
||||
var leaderAddr = _cluster.State.RoleLeader(role);
|
||||
_roleLeaders[role] = leaderAddr is not null
|
||||
? snapshot.Members.FirstOrDefault(m => m.Address == leaderAddr)
|
||||
: null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal void HandleMemberEvent(ClusterEvent.IMemberEvent evt)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
switch (evt)
|
||||
{
|
||||
case ClusterEvent.MemberUp up:
|
||||
foreach (var role in up.Member.Roles)
|
||||
{
|
||||
if (!_membersByRole.TryGetValue(role, out var set))
|
||||
_membersByRole[role] = set = new HashSet<Member>();
|
||||
set.Add(up.Member);
|
||||
}
|
||||
break;
|
||||
case ClusterEvent.MemberRemoved removed:
|
||||
foreach (var role in removed.Member.Roles)
|
||||
if (_membersByRole.TryGetValue(role, out var set))
|
||||
set.Remove(removed.Member);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal void HandleRoleLeaderChanged(ClusterEvent.RoleLeaderChanged evt)
|
||||
{
|
||||
CommonsNodeId? previous = null;
|
||||
CommonsNodeId? next = null;
|
||||
var raise = false;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
_roleLeaders.TryGetValue(evt.Role, out var prevMember);
|
||||
if (prevMember is not null)
|
||||
previous = ToNodeId(prevMember.Address);
|
||||
|
||||
var nextMember = evt.Leader is null
|
||||
? null
|
||||
: _cluster.State.Members.FirstOrDefault(m => m.Address == evt.Leader);
|
||||
|
||||
_roleLeaders[evt.Role] = nextMember;
|
||||
if (nextMember is not null)
|
||||
next = ToNodeId(nextMember.Address);
|
||||
|
||||
raise = !Nullable.Equals(previous, next);
|
||||
}
|
||||
|
||||
if (!raise) return;
|
||||
try
|
||||
{
|
||||
RoleLeaderChanged?.Invoke(this, new RoleLeaderChangedEventArgs
|
||||
{
|
||||
Role = evt.Role,
|
||||
PreviousLeader = previous,
|
||||
NewLeader = next,
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "RoleLeaderChanged subscriber threw for role {Role}", evt.Role);
|
||||
}
|
||||
}
|
||||
|
||||
private static CommonsNodeId ToNodeId(Akka.Actor.Address address) =>
|
||||
CommonsNodeId.Parse($"{address.Host ?? string.Empty}:{address.Port ?? 0}");
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_subscriber?.Tell(PoisonPill.Instance);
|
||||
_subscriber = null;
|
||||
}
|
||||
|
||||
private sealed class SubscriberActor : ReceiveActor
|
||||
{
|
||||
public SubscriberActor(ClusterRoleInfo owner)
|
||||
{
|
||||
Receive<ClusterEvent.IMemberEvent>(e => owner.HandleMemberEvent(e));
|
||||
Receive<ClusterEvent.RoleLeaderChanged>(e => owner.HandleRoleLeaderChanged(e));
|
||||
Receive<ClusterEvent.LeaderChanged>(_ => { /* no-op for now; reserved for ServiceLevel calc */ });
|
||||
Receive<ClusterEvent.CurrentClusterState>(_ => { /* seeded from initial snapshot */ });
|
||||
}
|
||||
|
||||
protected override void PreStart()
|
||||
{
|
||||
Akka.Cluster.Cluster.Get(Context.System).Subscribe(
|
||||
Self,
|
||||
ClusterEvent.InitialStateAsEvents,
|
||||
typeof(ClusterEvent.IMemberEvent),
|
||||
typeof(ClusterEvent.LeaderChanged),
|
||||
typeof(ClusterEvent.RoleLeaderChanged));
|
||||
}
|
||||
|
||||
protected override void PostStop() =>
|
||||
Akka.Cluster.Cluster.Get(Context.System).Unsubscribe(Self);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Cluster;
|
||||
|
||||
public static class HoconLoader
|
||||
{
|
||||
private const string ResourceName = "ZB.MOM.WW.OtOpcUa.Cluster.Resources.akka.conf";
|
||||
|
||||
public static string LoadBaseConfig()
|
||||
{
|
||||
using var stream = typeof(HoconLoader).Assembly.GetManifestResourceStream(ResourceName)
|
||||
?? throw new InvalidOperationException(
|
||||
$"Embedded resource '{ResourceName}' not found. Verify EmbeddedResource glob in csproj.");
|
||||
using var reader = new StreamReader(stream);
|
||||
return reader.ReadToEnd();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
# Base Akka.NET cluster configuration for OtOpcUa fused-host nodes.
|
||||
#
|
||||
# Roles, seed nodes, public hostname/port, and the actor system name are overlaid
|
||||
# at runtime by AkkaHostedService — see ZB.MOM.WW.OtOpcUa.Cluster/AkkaHostedService.cs.
|
||||
# Everything else here is the cluster-wide tuning that should match across nodes.
|
||||
#
|
||||
# Tuning sourced from ScadaLink (ScadaLink.Host/Actors/AkkaHostedService.BuildHocon);
|
||||
# any divergence must be deliberate and recorded in docs/v2/Architecture.md.
|
||||
|
||||
akka {
|
||||
extensions = [
|
||||
"Akka.Cluster.Tools.PublishSubscribe.DistributedPubSubExtensionProvider, Akka.Cluster.Tools"
|
||||
]
|
||||
|
||||
actor {
|
||||
provider = cluster
|
||||
}
|
||||
|
||||
remote {
|
||||
dot-netty.tcp {
|
||||
hostname = "0.0.0.0"
|
||||
port = 4053
|
||||
}
|
||||
transport-failure-detector {
|
||||
heartbeat-interval = 2s
|
||||
acceptable-heartbeat-pause = 10s
|
||||
}
|
||||
}
|
||||
|
||||
cluster {
|
||||
seed-nodes = []
|
||||
roles = []
|
||||
min-nr-of-members = 1
|
||||
|
||||
split-brain-resolver {
|
||||
active-strategy = "keep-oldest"
|
||||
stable-after = 15s
|
||||
keep-oldest {
|
||||
down-if-alone = on
|
||||
}
|
||||
}
|
||||
|
||||
failure-detector {
|
||||
heartbeat-interval = 2s
|
||||
threshold = 10.0
|
||||
acceptable-heartbeat-pause = 10s
|
||||
}
|
||||
|
||||
down-removal-margin = 15s
|
||||
run-coordinated-shutdown-when-down = on
|
||||
|
||||
singleton {
|
||||
singleton-name = "singleton"
|
||||
}
|
||||
singleton-proxy {
|
||||
singleton-identification-interval = 1s
|
||||
}
|
||||
}
|
||||
|
||||
coordinated-shutdown {
|
||||
run-by-clr-shutdown-hook = on
|
||||
default-phase-timeout = 30s
|
||||
}
|
||||
}
|
||||
|
||||
# Pinned dispatcher used by OpcUaPublishActor (Task 44) so the OPC UA SDK sees
|
||||
# only one thread per actor instance — its session/subscription locks expect
|
||||
# strict single-threaded access.
|
||||
opcua-synchronized-dispatcher {
|
||||
type = "PinnedDispatcher"
|
||||
executor = "thread-pool-executor"
|
||||
throughput = 1
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Cluster;
|
||||
|
||||
public static class RoleParser
|
||||
{
|
||||
private static readonly HashSet<string> Allowed = new(StringComparer.Ordinal)
|
||||
{
|
||||
"admin", "driver", "dev",
|
||||
};
|
||||
|
||||
public static string[] Parse(string? raw)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(raw)) return Array.Empty<string>();
|
||||
|
||||
var roles = raw
|
||||
.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.Select(r => r.ToLowerInvariant())
|
||||
.Distinct()
|
||||
.ToArray();
|
||||
|
||||
foreach (var r in roles)
|
||||
{
|
||||
if (!Allowed.Contains(r))
|
||||
throw new ArgumentException(
|
||||
$"Unknown role '{r}'. Allowed: {string.Join(", ", Allowed)}.", nameof(raw));
|
||||
}
|
||||
|
||||
return roles;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
using Akka.Cluster.Hosting;
|
||||
using Akka.Hosting;
|
||||
using Akka.Remote.Hosting;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Cluster;
|
||||
|
||||
public static class ServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Binds <see cref="AkkaClusterOptions"/> and registers <see cref="IClusterRoleInfo"/>. The
|
||||
/// actual ActorSystem + cluster bootstrap is layered on inside the host's <c>AddAkka(...)</c>
|
||||
/// configurator via <see cref="WithOtOpcUaClusterBootstrap"/> — keeping the entire Akka graph
|
||||
/// under Akka.Hosting's management so cluster singletons land on the same ActorSystem.
|
||||
/// </summary>
|
||||
public static IServiceCollection AddOtOpcUaCluster(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
services.AddOptions<AkkaClusterOptions>()
|
||||
.Bind(configuration.GetSection(AkkaClusterOptions.SectionName));
|
||||
|
||||
services.AddSingleton<IClusterRoleInfo, ClusterRoleInfo>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configures the Akka.Hosting builder with the embedded OtOpcUa HOCON (split-brain resolver,
|
||||
/// pinned dispatcher, failure detector tuning) + remote endpoint + cluster bootstrap derived
|
||||
/// from <see cref="AkkaClusterOptions"/>.
|
||||
///
|
||||
/// Wire from Program.cs:
|
||||
/// <code>
|
||||
/// services.AddAkka("otopcua", (ab, sp) =>
|
||||
/// {
|
||||
/// ab.WithOtOpcUaClusterBootstrap(sp);
|
||||
/// if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
|
||||
/// if (hasDriver) ab.WithOtOpcUaRuntimeActors();
|
||||
/// });
|
||||
/// </code>
|
||||
/// </summary>
|
||||
public static AkkaConfigurationBuilder WithOtOpcUaClusterBootstrap(
|
||||
this AkkaConfigurationBuilder builder,
|
||||
IServiceProvider serviceProvider)
|
||||
{
|
||||
var options = serviceProvider.GetRequiredService<IOptions<AkkaClusterOptions>>().Value;
|
||||
|
||||
builder.AddHocon(HoconLoader.LoadBaseConfig(), HoconAddMode.Append);
|
||||
|
||||
builder.WithRemoting(new RemoteOptions
|
||||
{
|
||||
HostName = options.Hostname,
|
||||
Port = options.Port,
|
||||
PublicHostName = options.PublicHostname,
|
||||
});
|
||||
|
||||
builder.WithClustering(new ClusterOptions
|
||||
{
|
||||
SeedNodes = options.SeedNodes,
|
||||
Roles = options.Roles,
|
||||
});
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.Cluster</RootNamespace>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Akka.Hosting"/>
|
||||
<PackageReference Include="Akka.Cluster"/>
|
||||
<PackageReference Include="Akka.Cluster.Hosting"/>
|
||||
<PackageReference Include="Akka.Cluster.Tools"/>
|
||||
<PackageReference Include="Akka.Remote.Hosting"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Commons\ZB.MOM.WW.OtOpcUa.Commons.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<EmbeddedResource Include="Resources\akka.conf"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- OpenTelemetry.Api 1.9.0 reaches this project transitively from Akka.Cluster.Hosting.
|
||||
Bump arrives when Akka updates its OTel dependency; tracked separately. -->
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-g94r-2vxg-569j"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,41 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Engines;
|
||||
|
||||
/// <summary>
|
||||
/// Persistence seam for <c>ScriptedAlarmActor</c>'s in-memory state across actor restarts.
|
||||
/// Captures only the slice the actor's 3-state machine needs (Inactive / Active /
|
||||
/// Acknowledged + last transition + last-ack user). The fuller GxP audit trail
|
||||
/// (<see cref="Configuration.Entities.ScriptedAlarmState"/>'s Comments/Confirmed/Shelving)
|
||||
/// stays in the production engine binding — this seam is the small surface the actor
|
||||
/// consumes directly.
|
||||
/// </summary>
|
||||
public interface IAlarmActorStateStore
|
||||
{
|
||||
Task<AlarmActorStateSnapshot?> LoadAsync(string alarmId, CancellationToken ct);
|
||||
Task SaveAsync(AlarmActorStateSnapshot snapshot, CancellationToken ct);
|
||||
}
|
||||
|
||||
/// <summary>Persisted slice of <c>ScriptedAlarmActor</c>'s state. Active is NOT persisted —
|
||||
/// it re-derives from the evaluator on startup per Phase 7 decision #14. <c>State</c> here
|
||||
/// distinguishes Acknowledged vs not-yet-acknowledged for cases where the actor came up
|
||||
/// Active and operator interaction had already happened.</summary>
|
||||
/// <param name="AlarmId">Matches <c>ScriptedAlarm.ScriptedAlarmId</c>.</param>
|
||||
/// <param name="State">Inactive / Active / Acknowledged — the actor's 3-state enum, projected to string.</param>
|
||||
/// <param name="LastTransitionUtc">When the actor last transitioned.</param>
|
||||
/// <param name="LastAckUser">Who acknowledged most recently. Null when never acked.</param>
|
||||
public sealed record AlarmActorStateSnapshot(
|
||||
string AlarmId,
|
||||
string State,
|
||||
DateTime LastTransitionUtc,
|
||||
string? LastAckUser);
|
||||
|
||||
/// <summary>No-op default. Bound when no production store is configured (tests, smoke runs).
|
||||
/// Load returns null → actor boots Inactive; Save is a no-op so state doesn't leak.</summary>
|
||||
public sealed class NullAlarmActorStateStore : IAlarmActorStateStore
|
||||
{
|
||||
public static readonly NullAlarmActorStateStore Instance = new();
|
||||
private NullAlarmActorStateStore() { }
|
||||
public Task<AlarmActorStateSnapshot?> LoadAsync(string alarmId, CancellationToken ct) =>
|
||||
Task.FromResult<AlarmActorStateSnapshot?>(null);
|
||||
public Task SaveAsync(AlarmActorStateSnapshot snapshot, CancellationToken ct) =>
|
||||
Task.CompletedTask;
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Engines;
|
||||
|
||||
/// <summary>
|
||||
/// Abstraction over the scripted-alarm predicate engine. Production binds this to a
|
||||
/// wrapper around <c>ScriptedAlarmEngine</c> from <c>Core.ScriptedAlarms</c>; default
|
||||
/// binding is <see cref="NullScriptedAlarmEvaluator"/> which keeps the alarm in its
|
||||
/// current state (so an unconfigured node never spuriously alarms).
|
||||
/// </summary>
|
||||
public interface IScriptedAlarmEvaluator
|
||||
{
|
||||
ScriptedAlarmEvalResult Evaluate(string alarmId, string predicate, IReadOnlyDictionary<string, object?> dependencies);
|
||||
}
|
||||
|
||||
/// <summary>Result of one alarm-predicate evaluation. <c>Active</c> is only meaningful when
|
||||
/// <c>Success</c> is true; on failure the caller should keep the prior state and log Reason.</summary>
|
||||
public sealed record ScriptedAlarmEvalResult(bool Success, bool Active, string? Reason)
|
||||
{
|
||||
public static ScriptedAlarmEvalResult Ok(bool active) => new(true, active, null);
|
||||
public static ScriptedAlarmEvalResult Failure(string reason) => new(false, false, reason);
|
||||
}
|
||||
|
||||
/// <summary>Default that always returns <c>Active = false, Success = true</c>. Safe no-op:
|
||||
/// no alarm fires when no real engine is bound.</summary>
|
||||
public sealed class NullScriptedAlarmEvaluator : IScriptedAlarmEvaluator
|
||||
{
|
||||
public static readonly NullScriptedAlarmEvaluator Instance = new();
|
||||
private NullScriptedAlarmEvaluator() { }
|
||||
public ScriptedAlarmEvalResult Evaluate(string alarmId, string predicate, IReadOnlyDictionary<string, object?> dependencies)
|
||||
=> ScriptedAlarmEvalResult.Ok(active: false);
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Engines;
|
||||
|
||||
/// <summary>
|
||||
/// Abstraction over the compiled virtual-tag expression engine. Runtime consumes this so
|
||||
/// <see cref="VirtualTagActor"/> can stay free of Roslyn / scripting machinery and the
|
||||
/// production wiring binds an adapter over <c>VirtualTagEngine</c> from
|
||||
/// <c>Core.VirtualTags</c>.
|
||||
/// </summary>
|
||||
public interface IVirtualTagEvaluator
|
||||
{
|
||||
/// <summary>
|
||||
/// Evaluate <paramref name="expression"/> against the snapshot in
|
||||
/// <paramref name="dependencies"/>. Implementations must not throw — script failures
|
||||
/// are reported via <see cref="VirtualTagEvalResult.Failure"/>.
|
||||
/// </summary>
|
||||
VirtualTagEvalResult Evaluate(string virtualTagId, string expression, IReadOnlyDictionary<string, object?> dependencies);
|
||||
}
|
||||
|
||||
/// <summary>Result of one virtual-tag expression eval. Stash a Reason on every Failure so
|
||||
/// callers can emit a useful <c>ScriptLogEntry</c> to operators.</summary>
|
||||
public sealed record VirtualTagEvalResult(bool Success, object? Value, string? Reason)
|
||||
{
|
||||
public static readonly VirtualTagEvalResult NoChange = new(true, null, "no-change");
|
||||
public static VirtualTagEvalResult Ok(object? value) => new(true, value, null);
|
||||
public static VirtualTagEvalResult Failure(string reason) => new(false, null, reason);
|
||||
}
|
||||
|
||||
/// <summary>Returns <see cref="VirtualTagEvalResult.NoChange"/> from every call. Bound by default
|
||||
/// when the production <c>VirtualTagEngine</c> adapter hasn't been registered (Mac dev, tests).</summary>
|
||||
public sealed class NullVirtualTagEvaluator : IVirtualTagEvaluator
|
||||
{
|
||||
public static readonly NullVirtualTagEvaluator Instance = new();
|
||||
private NullVirtualTagEvaluator() { }
|
||||
public VirtualTagEvalResult Evaluate(string virtualTagId, string expression, IReadOnlyDictionary<string, object?> dependencies)
|
||||
=> VirtualTagEvalResult.NoChange;
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
/// <summary>
|
||||
/// Cluster-singleton-proxy client for the <c>AdminOperationsActor</c>. The Blazor UI calls
|
||||
/// this from any host (admin or driver role); the proxy routes the request to whichever node
|
||||
/// holds the admin singleton.
|
||||
/// </summary>
|
||||
public interface IAdminOperationsClient
|
||||
{
|
||||
Task<StartDeploymentResult> StartDeploymentAsync(string createdBy, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
/// <summary>
|
||||
/// Live view of the local node's identity and the cluster's role topology. Implemented by
|
||||
/// <c>ClusterRoleInfo</c> in <c>OtOpcUa.Cluster</c>; consumed by everything that needs to
|
||||
/// distinguish admin-role vs driver-role members or react to role-leader changes (e.g. OPC UA
|
||||
/// ServiceLevel computation).
|
||||
/// </summary>
|
||||
public interface IClusterRoleInfo
|
||||
{
|
||||
NodeId LocalNode { get; }
|
||||
IReadOnlySet<string> LocalRoles { get; }
|
||||
bool HasRole(string role);
|
||||
IReadOnlyList<NodeId> MembersWithRole(string role);
|
||||
NodeId? RoleLeader(string role);
|
||||
|
||||
event EventHandler<RoleLeaderChangedEventArgs>? RoleLeaderChanged;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
/// <summary>
|
||||
/// Per-node diagnostics fetched on demand. Implemented in Phase 8 (AdminUI/Runtime wiring)
|
||||
/// over an Akka request/response — the diagnostics actor lives on the target driver node.
|
||||
/// </summary>
|
||||
public interface IFleetDiagnosticsClient
|
||||
{
|
||||
Task<NodeDiagnosticsSnapshot> GetDiagnosticsAsync(NodeId nodeId, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
public sealed record DriverInstanceDiagnostics(
|
||||
Guid DriverInstanceId,
|
||||
string Name,
|
||||
string State,
|
||||
int ConnectedDevices,
|
||||
int FaultedDevices,
|
||||
DateTime LastChangeUtc);
|
||||
|
||||
/// <summary>
|
||||
/// Per-node diagnostics returned by <c>IFleetDiagnosticsClient</c>. Populated by the node's
|
||||
/// local <c>DriverHostActor</c> via a request/response over Akka.
|
||||
/// </summary>
|
||||
public sealed record NodeDiagnosticsSnapshot(
|
||||
NodeId NodeId,
|
||||
RevisionHash? CurrentRevision,
|
||||
IReadOnlyList<DriverInstanceDiagnostics> Drivers,
|
||||
DateTime AsOfUtc);
|
||||
@@ -0,0 +1,10 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
public sealed class RoleLeaderChangedEventArgs : EventArgs
|
||||
{
|
||||
public required string Role { get; init; }
|
||||
public required NodeId? PreviousLeader { get; init; }
|
||||
public required NodeId? NewLeader { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
|
||||
|
||||
/// <summary>
|
||||
/// Request from the admin UI to the <c>AdminOperationsActor</c> singleton asking it to snapshot
|
||||
/// the current live-edit state and start a deployment.
|
||||
/// </summary>
|
||||
public sealed record StartDeployment(
|
||||
string CreatedBy,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,23 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
|
||||
|
||||
public enum StartDeploymentOutcome
|
||||
{
|
||||
Accepted,
|
||||
NoChanges,
|
||||
AnotherDeploymentInFlight,
|
||||
Rejected,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reply from the <c>AdminOperationsActor</c> singleton. <c>Accepted</c> means the snapshot
|
||||
/// was sealed and a <c>Deployment</c> row was created; the in-flight deployment can be
|
||||
/// tracked through fleet-status broadcasts.
|
||||
/// </summary>
|
||||
public sealed record StartDeploymentResult(
|
||||
StartDeploymentOutcome Outcome,
|
||||
DeploymentId? DeploymentId,
|
||||
RevisionHash? RevisionHash,
|
||||
string? Message,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,25 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Alerts;
|
||||
|
||||
/// <summary>
|
||||
/// Live alarm transition published on the cluster <c>alerts</c> DistributedPubSub topic.
|
||||
/// Emitted by ScriptedAlarmActor (and future native-alarm bridges) when an alarm condition
|
||||
/// transitions; consumed by <c>AlertSignalRBridge</c> for browser fan-out and by historian
|
||||
/// adapters for durable storage.
|
||||
/// </summary>
|
||||
/// <param name="AlarmId">Stable condition identity (matches <c>ScriptedAlarm.ScriptedAlarmId</c> for scripted alarms).</param>
|
||||
/// <param name="EquipmentPath">UNS path of the Equipment node the alarm hangs under. Doubles as the SourceNode.</param>
|
||||
/// <param name="AlarmName">Operator-visible alarm name.</param>
|
||||
/// <param name="TransitionKind">Activated / Cleared / Acknowledged / Confirmed / Shelved / Unshelved / Disabled / Enabled / CommentAdded.</param>
|
||||
/// <param name="Severity">1–1000 numeric severity (OPC UA convention).</param>
|
||||
/// <param name="Message">Fully-rendered message text — template tokens already resolved.</param>
|
||||
/// <param name="User">Operator who triggered the transition. "system" for engine-driven events.</param>
|
||||
/// <param name="TimestampUtc">When the transition occurred.</param>
|
||||
public sealed record AlarmTransitionEvent(
|
||||
string AlarmId,
|
||||
string EquipmentPath,
|
||||
string AlarmName,
|
||||
string TransitionKind,
|
||||
int Severity,
|
||||
string Message,
|
||||
string User,
|
||||
DateTime TimestampUtc);
|
||||
@@ -0,0 +1,17 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Audit;
|
||||
|
||||
/// <summary>
|
||||
/// Cluster-broadcast audit event consumed by the <c>AuditWriterActor</c> singleton, which
|
||||
/// batches and idempotently inserts into <c>ConfigAuditLog</c>.
|
||||
/// </summary>
|
||||
public sealed record AuditEvent(
|
||||
Guid EventId,
|
||||
string Category,
|
||||
string Action,
|
||||
string Actor,
|
||||
DateTime OccurredAtUtc,
|
||||
string? DetailsJson,
|
||||
NodeId SourceNode,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,15 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
|
||||
|
||||
public enum ApplyAckOutcome { Applied, Failed }
|
||||
|
||||
/// <summary>
|
||||
/// Per-node acknowledgment returned by <c>DriverHostActor</c> to the dispatching coordinator.
|
||||
/// </summary>
|
||||
public sealed record ApplyAck(
|
||||
DeploymentId DeploymentId,
|
||||
NodeId NodeId,
|
||||
ApplyAckOutcome Outcome,
|
||||
string? FailureReason,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,14 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
|
||||
|
||||
/// <summary>
|
||||
/// Coordinator-published event indicating that the deployment failed and was rolled back.
|
||||
/// Includes the set of nodes that NACKed or timed out so the admin UI can surface which
|
||||
/// node(s) are sticky on the prior good revision.
|
||||
/// </summary>
|
||||
public sealed record DeploymentFailed(
|
||||
DeploymentId DeploymentId,
|
||||
string FailureReason,
|
||||
IReadOnlyList<NodeId> FailedNodes,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,13 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
|
||||
|
||||
/// <summary>
|
||||
/// Coordinator-published event indicating that every active driver node successfully applied
|
||||
/// the deployment and the row in <c>Deployment</c> has been transitioned to <c>Sealed</c>.
|
||||
/// </summary>
|
||||
public sealed record DeploymentSealed(
|
||||
DeploymentId DeploymentId,
|
||||
RevisionHash RevisionHash,
|
||||
DateTime SealedAtUtc,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,13 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
|
||||
|
||||
/// <summary>
|
||||
/// Sent from the admin-role <c>ConfigPublishCoordinator</c> singleton to each driver node's
|
||||
/// <c>DriverHostActor</c>. Tells the node to fetch the deployment artifact identified by
|
||||
/// <paramref name="DeploymentId"/> + <paramref name="RevisionHash"/> and apply it.
|
||||
/// </summary>
|
||||
public sealed record DispatchDeployment(
|
||||
DeploymentId DeploymentId,
|
||||
RevisionHash RevisionHash,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,21 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Fleet;
|
||||
|
||||
public enum FleetNodeHealth { Healthy, Degraded, Unreachable }
|
||||
|
||||
public sealed record FleetNodeStatus(
|
||||
NodeId NodeId,
|
||||
FleetNodeHealth Health,
|
||||
RevisionHash? CurrentRevision,
|
||||
DateTime LastSeenUtc);
|
||||
|
||||
/// <summary>
|
||||
/// Periodic fleet-wide status broadcast pushed by <c>FleetStatusBroadcaster</c> to admin UI
|
||||
/// subscribers via SignalR.
|
||||
/// </summary>
|
||||
public sealed record FleetStatusChanged(
|
||||
IReadOnlyList<FleetNodeStatus> Nodes,
|
||||
DeploymentId? CurrentDeployment,
|
||||
DateTime AsOfUtc,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,10 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Fleet;
|
||||
|
||||
/// <summary>
|
||||
/// Request a diagnostic snapshot from a per-node <c>DriverHostActor</c>. Sent by the admin UI's
|
||||
/// <c>IFleetDiagnosticsClient</c> via <c>ActorSelection</c> over the cluster; the local
|
||||
/// DriverHostActor responds with a <c>NodeDiagnosticsSnapshot</c>.
|
||||
/// </summary>
|
||||
public sealed record GetDiagnostics(CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,23 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Logging;
|
||||
|
||||
/// <summary>
|
||||
/// One line of script log output published on the cluster <c>script-logs</c> DPS topic.
|
||||
/// Emitted by VirtualTagActor + ScriptedAlarmActor when their hosted scripts call into
|
||||
/// the runtime's logging facade; consumed by <c>ScriptLogSignalRBridge</c> for live
|
||||
/// browser tail-style viewing.
|
||||
/// </summary>
|
||||
/// <param name="ScriptId">The Script row this entry came from (matches <c>Script.ScriptId</c>).</param>
|
||||
/// <param name="Level">"Trace" / "Debug" / "Information" / "Warning" / "Error" / "Critical" — Serilog levels.</param>
|
||||
/// <param name="Message">Operator-facing log message; template tokens already resolved.</param>
|
||||
/// <param name="TimestampUtc">When the script emitted the entry.</param>
|
||||
/// <param name="VirtualTagId">VirtualTag context, if logged from a virtual tag evaluation. Null otherwise.</param>
|
||||
/// <param name="AlarmId">ScriptedAlarm context, if logged from an alarm predicate. Null otherwise.</param>
|
||||
/// <param name="EquipmentId">Equipment scope, if the script ran in a per-equipment context. Null for fleet-wide scripts.</param>
|
||||
public sealed record ScriptLogEntry(
|
||||
string ScriptId,
|
||||
string Level,
|
||||
string Message,
|
||||
DateTime TimestampUtc,
|
||||
string? VirtualTagId,
|
||||
string? AlarmId,
|
||||
string? EquipmentId);
|
||||
@@ -0,0 +1,16 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Redundancy;
|
||||
|
||||
public enum RedundancyRole { Primary, Secondary, Detached }
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot of a single node's redundancy state. Aggregated by <c>RedundancyStateActor</c>
|
||||
/// to compute fleet-wide ServiceLevel.
|
||||
/// </summary>
|
||||
public sealed record NodeRedundancyState(
|
||||
NodeId NodeId,
|
||||
RedundancyRole Role,
|
||||
bool IsClusterLeader,
|
||||
bool IsRoleLeaderForDriver,
|
||||
DateTime AsOfUtc);
|
||||
@@ -0,0 +1,11 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Broadcast whenever the cluster's redundancy topology changes (node up/down, role-leader
|
||||
/// change, partition heal). Subscribers compute their local OPC UA ServiceLevel from this.
|
||||
/// </summary>
|
||||
public sealed record RedundancyStateChanged(
|
||||
IReadOnlyList<NodeRedundancyState> Nodes,
|
||||
CorrelationId CorrelationId);
|
||||
@@ -0,0 +1,81 @@
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.Metrics;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Observability;
|
||||
|
||||
/// <summary>
|
||||
/// Central <see cref="Meter"/> + <see cref="ActivitySource"/> definitions for OtOpcUa.
|
||||
/// All Akka actors, the OPC UA publish path, and the deploy coordinator emit through these
|
||||
/// pre-created instruments so a single OpenTelemetry / Prometheus binding in <c>Host</c>
|
||||
/// catches everything. No exporter is required — instruments are no-op until a listener
|
||||
/// attaches, so tests and dev hosts pay nothing for instrumentation that nobody scrapes.
|
||||
///
|
||||
/// Instrument names follow the OpenTelemetry semantic convention pattern
|
||||
/// <c>otopcua.<subsystem>.<event></c>. Subsystem is one of: deploy, driver,
|
||||
/// virtualtag, scriptedalarm, opcua, redundancy.
|
||||
/// </summary>
|
||||
public static class OtOpcUaTelemetry
|
||||
{
|
||||
public const string MeterName = "ZB.MOM.WW.OtOpcUa";
|
||||
public const string ActivitySourceName = "ZB.MOM.WW.OtOpcUa";
|
||||
|
||||
/// <summary>Singleton <see cref="Meter"/> all counters/histograms hang off.</summary>
|
||||
public static readonly Meter Meter = new(MeterName);
|
||||
|
||||
/// <summary>Singleton <see cref="ActivitySource"/> used to start spans wrapping deploy/apply/rebuild.</summary>
|
||||
public static readonly ActivitySource ActivitySource = new(ActivitySourceName);
|
||||
|
||||
// ---------------- Deployment / driver-host coordination ----------------
|
||||
|
||||
/// <summary>Incremented every time DriverHostActor finishes applying a deployment (Ack or Reject).</summary>
|
||||
public static readonly Counter<long> DeploymentApplied =
|
||||
Meter.CreateCounter<long>("otopcua.deploy.applied", unit: "{deployment}",
|
||||
description: "Deployments applied by a driver-role node (outcome=ack|reject).");
|
||||
|
||||
/// <summary>Time from DriverHostActor receiving DispatchDeployment to emitting the ack/reject.</summary>
|
||||
public static readonly Histogram<double> DeploymentApplyDurationSec =
|
||||
Meter.CreateHistogram<double>("otopcua.deploy.apply.duration", unit: "s",
|
||||
description: "Driver-role apply latency from DispatchDeployment → Ack/Reject.");
|
||||
|
||||
/// <summary>DriverInstanceActor spawn count (added=new instance; stop=disposed).</summary>
|
||||
public static readonly Counter<long> DriverInstanceLifecycle =
|
||||
Meter.CreateCounter<long>("otopcua.driver.lifecycle", unit: "{event}",
|
||||
description: "DriverInstanceActor lifecycle transitions (event=spawn|stop|fault).");
|
||||
|
||||
// ---------------- VirtualTag / ScriptedAlarm engines ----------------
|
||||
|
||||
public static readonly Counter<long> VirtualTagEval =
|
||||
Meter.CreateCounter<long>("otopcua.virtualtag.eval", unit: "{eval}",
|
||||
description: "Virtual-tag evaluations attempted (outcome=ok|fail|skip).");
|
||||
|
||||
public static readonly Counter<long> ScriptedAlarmTransition =
|
||||
Meter.CreateCounter<long>("otopcua.scriptedalarm.transition", unit: "{transition}",
|
||||
description: "Scripted-alarm state transitions (state=active|acknowledged|inactive).");
|
||||
|
||||
// ---------------- OPC UA address-space + redundancy ----------------
|
||||
|
||||
public static readonly Counter<long> OpcUaSinkWrite =
|
||||
Meter.CreateCounter<long>("otopcua.opcua.sink.write", unit: "{write}",
|
||||
description: "Writes that landed in IOpcUaAddressSpaceSink (kind=value|alarm|rebuild).");
|
||||
|
||||
public static readonly Counter<long> ServiceLevelChange =
|
||||
Meter.CreateCounter<long>("otopcua.redundancy.service_level_change", unit: "{change}",
|
||||
description: "OPC UA Server.ServiceLevel transitions emitted by the redundancy state.");
|
||||
|
||||
// ---------------- Convenience helpers ----------------
|
||||
|
||||
/// <summary>
|
||||
/// Starts a deploy span tagged with the deployment id. Caller disposes to close. Returns
|
||||
/// null when no listener is attached so the call site stays cheap on undecorated builds.
|
||||
/// </summary>
|
||||
public static Activity? StartDeployApplySpan(string deploymentId)
|
||||
{
|
||||
var activity = ActivitySource.StartActivity("otopcua.deploy.apply", ActivityKind.Internal);
|
||||
activity?.SetTag("otopcua.deployment_id", deploymentId);
|
||||
return activity;
|
||||
}
|
||||
|
||||
/// <summary>Span wrapping a full OPC UA address-space rebuild (Phase7 plan → apply).</summary>
|
||||
public static Activity? StartAddressSpaceRebuildSpan()
|
||||
=> ActivitySource.StartActivity("otopcua.opcua.address_space_rebuild", ActivityKind.Internal);
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Wrapper <see cref="IOpcUaAddressSpaceSink"/> that defers to an inner sink swapped in at
|
||||
/// runtime. Needed because the production sink (<c>SdkAddressSpaceSink</c>) wraps an
|
||||
/// <c>OtOpcUaNodeManager</c> that only exists after the SDK <c>StandardServer</c> has
|
||||
/// started — but Akka actors resolve their sink dependency at construction time, before
|
||||
/// the hosted service has booted the SDK.
|
||||
///
|
||||
/// Bound as a singleton in DI on driver-role hosts; the OPC UA hosted service calls
|
||||
/// <see cref="SetSink"/> once the server is up. Until that swap happens, every call is a
|
||||
/// no-op against <see cref="NullOpcUaAddressSpaceSink"/>, so the actor stays safe to
|
||||
/// receive messages from the moment it boots.
|
||||
/// </summary>
|
||||
public sealed class DeferredAddressSpaceSink : IOpcUaAddressSpaceSink
|
||||
{
|
||||
private volatile IOpcUaAddressSpaceSink _inner = NullOpcUaAddressSpaceSink.Instance;
|
||||
|
||||
/// <summary>Swap in the production sink. Pass <c>null</c> to revert to the null sink
|
||||
/// (used during graceful shutdown so post-stop writes don't hit a half-disposed manager).</summary>
|
||||
public void SetSink(IOpcUaAddressSpaceSink? sink) =>
|
||||
_inner = sink ?? NullOpcUaAddressSpaceSink.Instance;
|
||||
|
||||
public void WriteValue(string nodeId, object? value, OpcUaQuality quality, DateTime sourceTimestampUtc)
|
||||
=> _inner.WriteValue(nodeId, value, quality, sourceTimestampUtc);
|
||||
|
||||
public void WriteAlarmState(string alarmNodeId, bool active, bool acknowledged, DateTime sourceTimestampUtc)
|
||||
=> _inner.WriteAlarmState(alarmNodeId, active, acknowledged, sourceTimestampUtc);
|
||||
|
||||
public void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName)
|
||||
=> _inner.EnsureFolder(folderNodeId, parentNodeId, displayName);
|
||||
|
||||
public void RebuildAddressSpace() => _inner.RebuildAddressSpace();
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Late-binding adapter that holds an inner <see cref="IServiceLevelPublisher"/> reference
|
||||
/// swappable at runtime. Mirrors <see cref="DeferredAddressSpaceSink"/>: Akka actors resolve
|
||||
/// the publisher at DI time, but the production <c>SdkServiceLevelPublisher</c> only exists
|
||||
/// after <c>StandardServer.Start</c>. The Host's hosted service swaps the inner once the SDK
|
||||
/// is up; until then writes route through <see cref="NullServiceLevelPublisher"/>.
|
||||
/// </summary>
|
||||
public sealed class DeferredServiceLevelPublisher : IServiceLevelPublisher
|
||||
{
|
||||
private volatile IServiceLevelPublisher _inner = NullServiceLevelPublisher.Instance;
|
||||
|
||||
/// <summary>Swap the underlying publisher. Pass null to revert to the Null no-op.</summary>
|
||||
public void SetInner(IServiceLevelPublisher? inner) =>
|
||||
_inner = inner ?? NullServiceLevelPublisher.Instance;
|
||||
|
||||
public void Publish(byte serviceLevel) => _inner.Publish(serviceLevel);
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Abstraction over the OPC UA SDK's address space. <c>OpcUaPublishActor</c> consumes this
|
||||
/// so the Runtime project doesn't reference <c>Opc.Ua.Server</c> directly — production
|
||||
/// binds a real SDK-backed sink in the fused Host's wiring, dev/Mac binds the
|
||||
/// <see cref="NullOpcUaAddressSpaceSink"/> no-op.
|
||||
/// </summary>
|
||||
public interface IOpcUaAddressSpaceSink
|
||||
{
|
||||
/// <summary>Write a Variable node's current value + quality + source timestamp.</summary>
|
||||
void WriteValue(string nodeId, object? value, OpcUaQuality quality, DateTime sourceTimestampUtc);
|
||||
|
||||
/// <summary>Write an alarm-condition Variable's active/acknowledged state.</summary>
|
||||
void WriteAlarmState(string alarmNodeId, bool active, bool acknowledged, DateTime sourceTimestampUtc);
|
||||
|
||||
/// <summary>
|
||||
/// Ensure a folder node exists under the given parent. Used by <c>Phase7Applier</c> to
|
||||
/// materialise the UNS Area/Line/Equipment hierarchy in the address space. When
|
||||
/// <paramref name="parentNodeId"/> is null the folder is parented under the namespace
|
||||
/// root. Idempotent: calling twice with the same id is safe.
|
||||
/// </summary>
|
||||
void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName);
|
||||
|
||||
/// <summary>
|
||||
/// Tear down + repopulate the address space. Called by <c>OpcUaPublishActor</c> after a
|
||||
/// successful deployment apply so the node manager reflects the new config. Idempotent.
|
||||
/// </summary>
|
||||
void RebuildAddressSpace();
|
||||
}
|
||||
|
||||
/// <summary>OPC UA status code projection — Good / Uncertain / Bad. Real SDK has finer-grained
|
||||
/// codes; the engine actors only need this 3-state classification.</summary>
|
||||
public enum OpcUaQuality { Good, Uncertain, Bad }
|
||||
|
||||
/// <summary>No-op sink. Bound by default so the actors are safe to run in dev / Mac /
|
||||
/// integration tests without a real SDK behind them.</summary>
|
||||
public sealed class NullOpcUaAddressSpaceSink : IOpcUaAddressSpaceSink
|
||||
{
|
||||
public static readonly NullOpcUaAddressSpaceSink Instance = new();
|
||||
private NullOpcUaAddressSpaceSink() { }
|
||||
public void WriteValue(string nodeId, object? value, OpcUaQuality quality, DateTime sourceTimestampUtc) { }
|
||||
public void WriteAlarmState(string alarmNodeId, bool active, bool acknowledged, DateTime sourceTimestampUtc) { }
|
||||
public void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName) { }
|
||||
public void RebuildAddressSpace() { }
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Writes the OPC UA Server object's <c>ServiceLevel</c> Variable (0–255). Production binds
|
||||
/// a sink that pokes the SDK's ServiceLevel node; tests + dev mode bind
|
||||
/// <see cref="NullServiceLevelPublisher"/> which just records the most recently set level
|
||||
/// for inspection.
|
||||
/// </summary>
|
||||
public interface IServiceLevelPublisher
|
||||
{
|
||||
void Publish(byte serviceLevel);
|
||||
}
|
||||
|
||||
/// <summary>No-op default that retains the last-written ServiceLevel in
|
||||
/// <see cref="LastPublished"/>. Used by dev mode + verified by tests.</summary>
|
||||
public sealed class NullServiceLevelPublisher : IServiceLevelPublisher
|
||||
{
|
||||
public static readonly NullServiceLevelPublisher Instance = new();
|
||||
private NullServiceLevelPublisher() { }
|
||||
public byte LastPublished { get; private set; }
|
||||
public void Publish(byte serviceLevel) => LastPublished = serviceLevel;
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
public readonly record struct CorrelationId(Guid Value)
|
||||
{
|
||||
public static CorrelationId NewId() => new(Guid.NewGuid());
|
||||
public override string ToString() => Value.ToString("N");
|
||||
public static CorrelationId Parse(string s) => new(Guid.ParseExact(s, "N"));
|
||||
public static bool TryParse(string? s, out CorrelationId id)
|
||||
{
|
||||
if (Guid.TryParseExact(s, "N", out var g)) { id = new CorrelationId(g); return true; }
|
||||
id = default; return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
public readonly record struct DeploymentId(Guid Value)
|
||||
{
|
||||
public static DeploymentId NewId() => new(Guid.NewGuid());
|
||||
public override string ToString() => Value.ToString("N");
|
||||
public static DeploymentId Parse(string s) => new(Guid.ParseExact(s, "N"));
|
||||
public static bool TryParse(string? s, out DeploymentId id)
|
||||
{
|
||||
if (Guid.TryParseExact(s, "N", out var g)) { id = new DeploymentId(g); return true; }
|
||||
id = default; return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
public readonly record struct ExecutionId(Guid Value)
|
||||
{
|
||||
public static ExecutionId NewId() => new(Guid.NewGuid());
|
||||
public override string ToString() => Value.ToString("N");
|
||||
public static ExecutionId Parse(string s) => new(Guid.ParseExact(s, "N"));
|
||||
public static bool TryParse(string? s, out ExecutionId id)
|
||||
{
|
||||
if (Guid.TryParseExact(s, "N", out var g)) { id = new ExecutionId(g); return true; }
|
||||
id = default; return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
/// <summary>
|
||||
/// Logical cluster node identifier — typically the host name configured on a fused
|
||||
/// <c>OtOpcUa.Host</c> instance. NOT to be confused with OPC UA <c>NodeId</c> from the
|
||||
/// Opc.Ua.Core stack.
|
||||
/// </summary>
|
||||
public readonly record struct NodeId(string Value)
|
||||
{
|
||||
public override string ToString() => Value;
|
||||
public static NodeId Parse(string s) =>
|
||||
string.IsNullOrWhiteSpace(s)
|
||||
? throw new ArgumentException("NodeId value cannot be empty.", nameof(s))
|
||||
: new NodeId(s);
|
||||
public static bool TryParse(string? s, out NodeId id)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(s)) { id = new NodeId(s); return true; }
|
||||
id = default; return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
|
||||
/// <summary>
|
||||
/// SHA-256 hex digest identifying a config snapshot revision. Storage form is lowercase
|
||||
/// 64-char hex (no <c>0x</c> prefix). Empty hash is invalid.
|
||||
/// </summary>
|
||||
public readonly record struct RevisionHash(string Value)
|
||||
{
|
||||
public override string ToString() => Value;
|
||||
public static RevisionHash Parse(string s) =>
|
||||
string.IsNullOrWhiteSpace(s)
|
||||
? throw new ArgumentException("RevisionHash value cannot be empty.", nameof(s))
|
||||
: new RevisionHash(s);
|
||||
public static bool TryParse(string? s, out RevisionHash hash)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(s)) { hash = new RevisionHash(s); return true; }
|
||||
hash = default; return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.Commons</RootNamespace>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Akka"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -1,19 +0,0 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Apply;
|
||||
|
||||
/// <summary>
|
||||
/// Host-supplied callbacks invoked as the applier walks the diff. Callbacks are idempotent on
|
||||
/// retry (the applier may re-invoke with the same inputs if a later stage fails — nodes
|
||||
/// register-applied to the central DB only after success). Order: namespace → driver → device →
|
||||
/// equipment → poll group → tag, with Removed before Added/Modified.
|
||||
/// </summary>
|
||||
public sealed class ApplyCallbacks
|
||||
{
|
||||
public Func<EntityChange<Namespace>, CancellationToken, Task>? OnNamespace { get; init; }
|
||||
public Func<EntityChange<DriverInstance>, CancellationToken, Task>? OnDriver { get; init; }
|
||||
public Func<EntityChange<Device>, CancellationToken, Task>? OnDevice { get; init; }
|
||||
public Func<EntityChange<Equipment>, CancellationToken, Task>? OnEquipment { get; init; }
|
||||
public Func<EntityChange<PollGroup>, CancellationToken, Task>? OnPollGroup { get; init; }
|
||||
public Func<EntityChange<Tag>, CancellationToken, Task>? OnTag { get; init; }
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Apply;
|
||||
|
||||
public enum ChangeKind
|
||||
{
|
||||
Added,
|
||||
Removed,
|
||||
Modified,
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Validation;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Apply;
|
||||
|
||||
public sealed class GenerationApplier(ApplyCallbacks callbacks) : IGenerationApplier
|
||||
{
|
||||
public async Task<ApplyResult> ApplyAsync(DraftSnapshot? from, DraftSnapshot to, CancellationToken ct)
|
||||
{
|
||||
var diff = GenerationDiffer.Compute(from, to);
|
||||
var errors = new List<string>();
|
||||
|
||||
// Removed first, then Added/Modified — prevents FK dangling while cascades settle.
|
||||
await ApplyPass(diff.Tags, ChangeKind.Removed, callbacks.OnTag, errors, ct);
|
||||
await ApplyPass(diff.PollGroups, ChangeKind.Removed, callbacks.OnPollGroup, errors, ct);
|
||||
await ApplyPass(diff.Equipment, ChangeKind.Removed, callbacks.OnEquipment, errors, ct);
|
||||
await ApplyPass(diff.Devices, ChangeKind.Removed, callbacks.OnDevice, errors, ct);
|
||||
await ApplyPass(diff.Drivers, ChangeKind.Removed, callbacks.OnDriver, errors, ct);
|
||||
await ApplyPass(diff.Namespaces, ChangeKind.Removed, callbacks.OnNamespace, errors, ct);
|
||||
|
||||
foreach (var kind in new[] { ChangeKind.Added, ChangeKind.Modified })
|
||||
{
|
||||
// Honour cancellation between passes — a caller can abort the apply between Removed
|
||||
// and Added phases even if individual callbacks don't observe the token themselves
|
||||
// (Configuration-007).
|
||||
ct.ThrowIfCancellationRequested();
|
||||
await ApplyPass(diff.Namespaces, kind, callbacks.OnNamespace, errors, ct);
|
||||
await ApplyPass(diff.Drivers, kind, callbacks.OnDriver, errors, ct);
|
||||
await ApplyPass(diff.Devices, kind, callbacks.OnDevice, errors, ct);
|
||||
await ApplyPass(diff.Equipment, kind, callbacks.OnEquipment, errors, ct);
|
||||
await ApplyPass(diff.PollGroups, kind, callbacks.OnPollGroup, errors, ct);
|
||||
await ApplyPass(diff.Tags, kind, callbacks.OnTag, errors, ct);
|
||||
}
|
||||
|
||||
return errors.Count == 0 ? ApplyResult.Ok(diff) : ApplyResult.Fail(diff, errors);
|
||||
}
|
||||
|
||||
private static async Task ApplyPass<T>(
|
||||
IReadOnlyList<EntityChange<T>> changes,
|
||||
ChangeKind kind,
|
||||
Func<EntityChange<T>, CancellationToken, Task>? callback,
|
||||
List<string> errors,
|
||||
CancellationToken ct)
|
||||
{
|
||||
if (callback is null) return;
|
||||
|
||||
foreach (var change in changes.Where(c => c.Kind == kind))
|
||||
{
|
||||
try { await callback(change, ct); }
|
||||
// Configuration-007: cancellation must propagate, not be silently recorded as an
|
||||
// entity error. Distinguish caller cancellation (token signalled) from any
|
||||
// OperationCanceledException raised independently of the caller's token, which we
|
||||
// still want to surface as an entity error so a single misbehaving callback does
|
||||
// not crash the entire apply.
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
|
||||
catch (Exception ex) { errors.Add($"{typeof(T).Name} {change.Kind} '{change.LogicalId}': {ex.Message}"); }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Validation;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Apply;
|
||||
|
||||
/// <summary>
|
||||
/// Per-entity diff computed locally on the node. The enumerable order matches the dependency
|
||||
/// order expected by <see cref="IGenerationApplier"/>: namespace → driver → device → equipment →
|
||||
/// poll group → tag → ACL, with Removed processed before Added inside each bucket so cascades
|
||||
/// settle before new rows appear.
|
||||
/// </summary>
|
||||
public sealed record GenerationDiff(
|
||||
IReadOnlyList<EntityChange<Namespace>> Namespaces,
|
||||
IReadOnlyList<EntityChange<DriverInstance>> Drivers,
|
||||
IReadOnlyList<EntityChange<Device>> Devices,
|
||||
IReadOnlyList<EntityChange<Equipment>> Equipment,
|
||||
IReadOnlyList<EntityChange<PollGroup>> PollGroups,
|
||||
IReadOnlyList<EntityChange<Tag>> Tags);
|
||||
|
||||
public sealed record EntityChange<T>(ChangeKind Kind, string LogicalId, T? From, T? To);
|
||||
|
||||
public static class GenerationDiffer
|
||||
{
|
||||
public static GenerationDiff Compute(DraftSnapshot? from, DraftSnapshot to)
|
||||
{
|
||||
from ??= new DraftSnapshot { GenerationId = 0, ClusterId = to.ClusterId };
|
||||
|
||||
return new GenerationDiff(
|
||||
Namespaces: DiffById(from.Namespaces, to.Namespaces, x => x.NamespaceId,
|
||||
(a, b) => (a.ClusterId, a.NamespaceUri, a.Kind, a.Enabled, a.Notes)
|
||||
== (b.ClusterId, b.NamespaceUri, b.Kind, b.Enabled, b.Notes)),
|
||||
Drivers: DiffById(from.DriverInstances, to.DriverInstances, x => x.DriverInstanceId,
|
||||
(a, b) => (a.ClusterId, a.NamespaceId, a.Name, a.DriverType, a.Enabled, a.DriverConfig)
|
||||
== (b.ClusterId, b.NamespaceId, b.Name, b.DriverType, b.Enabled, b.DriverConfig)),
|
||||
Devices: DiffById(from.Devices, to.Devices, x => x.DeviceId,
|
||||
(a, b) => (a.DriverInstanceId, a.Name, a.Enabled, a.DeviceConfig)
|
||||
== (b.DriverInstanceId, b.Name, b.Enabled, b.DeviceConfig)),
|
||||
Equipment: DiffById(from.Equipment, to.Equipment, x => x.EquipmentId,
|
||||
(a, b) => (a.EquipmentUuid, a.DriverInstanceId, a.UnsLineId, a.Name, a.MachineCode, a.ZTag, a.SAPID, a.Enabled)
|
||||
== (b.EquipmentUuid, b.DriverInstanceId, b.UnsLineId, b.Name, b.MachineCode, b.ZTag, b.SAPID, b.Enabled)),
|
||||
PollGroups: DiffById(from.PollGroups, to.PollGroups, x => x.PollGroupId,
|
||||
(a, b) => (a.DriverInstanceId, a.Name, a.IntervalMs)
|
||||
== (b.DriverInstanceId, b.Name, b.IntervalMs)),
|
||||
Tags: DiffById(from.Tags, to.Tags, x => x.TagId,
|
||||
(a, b) => (a.DriverInstanceId, a.DeviceId, a.EquipmentId, a.PollGroupId, a.FolderPath, a.Name, a.DataType, a.AccessLevel, a.WriteIdempotent, a.TagConfig)
|
||||
== (b.DriverInstanceId, b.DeviceId, b.EquipmentId, b.PollGroupId, b.FolderPath, b.Name, b.DataType, b.AccessLevel, b.WriteIdempotent, b.TagConfig)));
|
||||
}
|
||||
|
||||
private static List<EntityChange<T>> DiffById<T>(
|
||||
IReadOnlyList<T> from, IReadOnlyList<T> to,
|
||||
Func<T, string> id, Func<T, T, bool> equal)
|
||||
{
|
||||
var fromById = from.ToDictionary(id);
|
||||
var toById = to.ToDictionary(id);
|
||||
var result = new List<EntityChange<T>>();
|
||||
|
||||
foreach (var (logicalId, src) in fromById.Where(kv => !toById.ContainsKey(kv.Key)))
|
||||
result.Add(new(ChangeKind.Removed, logicalId, src, default));
|
||||
|
||||
foreach (var (logicalId, dst) in toById)
|
||||
{
|
||||
if (!fromById.TryGetValue(logicalId, out var src))
|
||||
result.Add(new(ChangeKind.Added, logicalId, default, dst));
|
||||
else if (!equal(src, dst))
|
||||
result.Add(new(ChangeKind.Modified, logicalId, src, dst));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Validation;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Apply;
|
||||
|
||||
/// <summary>
|
||||
/// Applies a <see cref="GenerationDiff"/> to whatever backing runtime the node owns: the OPC UA
|
||||
/// address space, driver subscriptions, the local cache, etc. The Core project wires concrete
|
||||
/// callbacks into this via <see cref="ApplyCallbacks"/> so the Configuration project stays free
|
||||
/// of a Core/Server dependency (interface independence per decision #59).
|
||||
/// </summary>
|
||||
public interface IGenerationApplier
|
||||
{
|
||||
Task<ApplyResult> ApplyAsync(DraftSnapshot? from, DraftSnapshot to, CancellationToken ct);
|
||||
}
|
||||
|
||||
public sealed record ApplyResult(
|
||||
bool Succeeded,
|
||||
GenerationDiff Diff,
|
||||
IReadOnlyList<string> Errors)
|
||||
{
|
||||
public static ApplyResult Ok(GenerationDiff diff) => new(true, diff, []);
|
||||
public static ApplyResult Fail(GenerationDiff diff, IReadOnlyList<string> errors) => new(false, diff, errors);
|
||||
}
|
||||
@@ -1,5 +1,3 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
/// <summary>Physical OPC UA server node within a <see cref="ServerCluster"/>.</summary>
|
||||
@@ -10,8 +8,6 @@ public sealed class ClusterNode
|
||||
|
||||
public required string ClusterId { get; set; }
|
||||
|
||||
public required RedundancyRole RedundancyRole { get; set; }
|
||||
|
||||
/// <summary>Machine hostname / IP.</summary>
|
||||
public required string Host { get; set; }
|
||||
|
||||
@@ -47,5 +43,4 @@ public sealed class ClusterNode
|
||||
// Navigation
|
||||
public ServerCluster? Cluster { get; set; }
|
||||
public ICollection<ClusterNodeCredential> Credentials { get; set; } = [];
|
||||
public ClusterNodeGenerationState? GenerationState { get; set; }
|
||||
}
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// Tracks which generation each node has applied. Per-node (not per-cluster) — both nodes of a
|
||||
/// 2-node cluster track independently per decision #84.
|
||||
/// </summary>
|
||||
public sealed class ClusterNodeGenerationState
|
||||
{
|
||||
public required string NodeId { get; set; }
|
||||
|
||||
public long? CurrentGenerationId { get; set; }
|
||||
|
||||
public DateTime? LastAppliedAt { get; set; }
|
||||
|
||||
public NodeApplyStatus? LastAppliedStatus { get; set; }
|
||||
|
||||
public string? LastAppliedError { get; set; }
|
||||
|
||||
/// <summary>Updated on every poll for liveness detection.</summary>
|
||||
public DateTime? LastSeenAt { get; set; }
|
||||
|
||||
public ClusterNode? Node { get; set; }
|
||||
public ConfigGeneration? CurrentGeneration { get; set; }
|
||||
}
|
||||
@@ -22,4 +22,16 @@ public sealed class ConfigAuditLog
|
||||
public long? GenerationId { get; set; }
|
||||
|
||||
public string? DetailsJson { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Stable per-event identifier from <c>AuditEvent.EventId</c>. Filtered unique index on
|
||||
/// this column gives cross-restart idempotency for the batched AuditWriterActor: a flush
|
||||
/// that retries after a process crash can re-send the same EventId without producing a
|
||||
/// duplicate row. Nullable so pre-v2 rows backfill cleanly.
|
||||
/// </summary>
|
||||
public Guid? EventId { get; set; }
|
||||
|
||||
/// <summary>Correlation ID from <c>AuditEvent.CorrelationId</c> so an audit row joins to its
|
||||
/// originating request/workflow. Nullable for the same backfill reason as <see cref="EventId"/>.</summary>
|
||||
public Guid? CorrelationId { get; set; }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// Append-only audit row written by AdminOperationsActor on every mutating live-edit
|
||||
/// operation. The ExecutionId optionally correlates a sequence of edits that ran inside one
|
||||
/// admin transaction (e.g. an import batch that updates many Equipment rows).
|
||||
/// </summary>
|
||||
public sealed class ConfigEdit
|
||||
{
|
||||
public Guid EditId { get; init; } = Guid.NewGuid();
|
||||
|
||||
public required string EntityType { get; init; }
|
||||
|
||||
public Guid EntityId { get; init; }
|
||||
|
||||
/// <summary>JSON payload of the column-name → new-value pairs touched by this edit.</summary>
|
||||
public required string FieldsJson { get; init; }
|
||||
|
||||
/// <summary>Optional correlation across edits inside a single admin operation.</summary>
|
||||
public Guid? ExecutionId { get; init; }
|
||||
|
||||
public required string EditedBy { get; init; }
|
||||
|
||||
public DateTime EditedAtUtc { get; init; } = DateTime.UtcNow;
|
||||
|
||||
public required string SourceNode { get; init; }
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user