Compare commits
17 Commits
9a67ebc8a8
...
1c579410cd
| Author | SHA1 | Date | |
|---|---|---|---|
| 1c579410cd | |||
| b0a62a9f3b | |||
| 1f76eac97a | |||
| b45e0be427 | |||
| e4a3f07c99 | |||
| b88ae5db10 | |||
| ec9599e234 | |||
| 8ce57e47a3 | |||
| 1b7f995aea | |||
| 4fca4e1aca | |||
| 7b2f64fdb8 | |||
| 05471dc36c | |||
| 7bba86b2af | |||
| 5f48f81d5a | |||
| 24796f2c12 | |||
| 7bec2fd4db | |||
| ab8900eee5 |
+17
-1
@@ -1,6 +1,7 @@
|
||||
# Multi-stage build of OtOpcUa.Host targeting linux-x64. Used by docker-dev/docker-compose.yml
|
||||
# to spin four host containers (admin-a, admin-b, driver-a, driver-b) from a single image —
|
||||
# to spin six host containers (central-1, central-2, site-a-1, site-a-2, site-b-1, site-b-2) from a single image —
|
||||
# Compose drives OTOPCUA_ROLES + Cluster:* env per container to differentiate them.
|
||||
# A separate `migrator` stage (below) applies EF migrations once on bring-up.
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
WORKDIR /src
|
||||
@@ -18,3 +19,18 @@ EXPOSE 4053
|
||||
EXPOSE 4840
|
||||
|
||||
ENTRYPOINT ["dotnet", "OtOpcUa.Host.dll"]
|
||||
|
||||
# ── Migrator (one-shot) ──────────────────────────────────────────────────────
|
||||
# Applies EF Core migrations to the ConfigDb so a fresh SQL volume gets the schema
|
||||
# with no operator step. docker-dev compose runs this once, before cluster-seed +
|
||||
# the host nodes (they depend on it via service_completed_successfully). The host
|
||||
# nodes deliberately do NOT auto-migrate (production owns schema changes), so this
|
||||
# rig-only stage carries that responsibility. The connection comes from the
|
||||
# OTOPCUA_CONFIG_CONNECTION env var read by DesignTimeDbContextFactory.
|
||||
FROM build AS migrator
|
||||
RUN dotnet tool install --global dotnet-ef --version 10.0.7
|
||||
ENV PATH="${PATH}:/root/.dotnet/tools"
|
||||
WORKDIR /src
|
||||
ENTRYPOINT ["dotnet", "ef", "database", "update", \
|
||||
"--project", "src/Core/ZB.MOM.WW.OtOpcUa.Configuration", \
|
||||
"--startup-project", "src/Core/ZB.MOM.WW.OtOpcUa.Configuration"]
|
||||
|
||||
+41
-38
@@ -1,6 +1,6 @@
|
||||
# docker-dev
|
||||
|
||||
Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up **three isolated Akka clusters** + SQL Server + Traefik on the same Compose network. All three clusters share the single `OtOpcUa` ConfigDb — multi-tenancy is enforced by per-row `ServerCluster.ClusterId` scoping. Akka.Cluster gossip stays isolated between meshes because their seed-node lists are disjoint, even though they share the same system name `otopcua`.
|
||||
Mac-friendly OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up **one single Akka mesh** (hub-and-spoke topology) + SQL Server + Traefik on the same Compose network. All six host nodes share the single `OtOpcUa` ConfigDb — logical separation between MAIN, SITE-A, and SITE-B is enforced by per-row `ServerCluster.ClusterId` scoping, not by mesh isolation.
|
||||
|
||||
## Stack
|
||||
|
||||
@@ -8,50 +8,48 @@ Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration sm
|
||||
|
||||
| Service | Role | Ports |
|
||||
|---|---|---|
|
||||
| `sql` | SQL Server 2022 — single `OtOpcUa` ConfigDb shared by all three clusters | host `14330` → container `1433` |
|
||||
| `traefik` | Routes :80 by Host header / PathPrefix | host `80`, dashboard `8089` |
|
||||
| `sql` | SQL Server 2022 — single `OtOpcUa` ConfigDb shared by all nodes | host `14330` → container `1433` |
|
||||
| `traefik` | Routes `:80` by PathPrefix to central admin nodes | host `80`, dashboard `8089` |
|
||||
|
||||
Authentication uses the **shared GLAuth** on the Linux Docker host at `10.100.0.35:3893` (baseDN `dc=zb,dc=local`). Every host container binds that instance via `cn=serviceaccount,dc=zb,dc=local`. `DevStubMode` is **not** active. Sign in as `multi-role` / `password` to get all three OtOpcUa roles (Administrator, Designer, Viewer), or use any other shared test user with password `password`. Group→role mappings are seeded by `seed/seed-clusters.sql` (`OtOpcUa-Admins`→Administrator, `OtOpcUa-Designers`→Designer, `OtOpcUa-Viewers`→Viewer). The shared GLAuth source of truth and deploy runbook live in `scadaproj/infra/glauth/`.
|
||||
Authentication uses the **shared GLAuth** on the Linux Docker host at `10.100.0.35:3893` (baseDN `dc=zb,dc=local`). Only the central admin nodes authenticate users. Sign in as `multi-role` / `password` to get all three OtOpcUa roles (Administrator, Designer, Viewer), or use any other shared test user with password `password`. Group→role mappings are seeded by `seed/seed-clusters.sql` (`OtOpcUa-Admins`→Administrator, `OtOpcUa-Designers`→Designer, `OtOpcUa-Viewers`→Viewer). The shared GLAuth source of truth and deploy runbook live in `scadaproj/infra/glauth/`.
|
||||
|
||||
### Main cluster — split admin/driver roles
|
||||
### Central nodes — fused admin+driver (MAIN cluster, UI + deploy singleton)
|
||||
|
||||
| Service | Role | Ports |
|
||||
| Service | Roles | Ports |
|
||||
|---|---|---|
|
||||
| `admin-a` | `OTOPCUA_ROLES=admin`, cluster seed | internal `9000` |
|
||||
| `admin-b` | `OTOPCUA_ROLES=admin`, joins admin-a | internal `9000` |
|
||||
| `driver-a` | `OTOPCUA_ROLES=driver` | host `4840` → container `4840` |
|
||||
| `driver-b` | `OTOPCUA_ROLES=driver` | host `4841` → container `4840` |
|
||||
| `central-1` | `OTOPCUA_ROLES=admin,driver`, Akka mesh seed | host `4840` → container `4840`; internal `9000` |
|
||||
| `central-2` | `OTOPCUA_ROLES=admin,driver`, joins central-1 | host `4841` → container `4840`; internal `9000` |
|
||||
|
||||
### Site A cluster — 2-node fused admin+driver
|
||||
`central-1` and `central-2` are the **only** nodes that host the Admin UI and the deploy singleton. They are also the OPC UA publishers for the MAIN cluster. Traefik routes all `PathPrefix(/)` traffic to whichever central node has the leader role.
|
||||
|
||||
| Service | Role | Ports |
|
||||
### Site A nodes — driver-only (SITE-A cluster)
|
||||
|
||||
| Service | Roles | Ports |
|
||||
|---|---|---|
|
||||
| `site-a-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4842` → container `4840` |
|
||||
| `site-a-2` | `OTOPCUA_ROLES=admin,driver`, joins site-a-1 | host `4843` → container `4840` |
|
||||
| `site-a-1` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4842` → container `4840` |
|
||||
| `site-a-2` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4843` → container `4840` |
|
||||
|
||||
### Site B cluster — 2-node fused admin+driver
|
||||
### Site B nodes — driver-only (SITE-B cluster)
|
||||
|
||||
| Service | Role | Ports |
|
||||
| Service | Roles | Ports |
|
||||
|---|---|---|
|
||||
| `site-b-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4844` → container `4840` |
|
||||
| `site-b-2` | `OTOPCUA_ROLES=admin,driver`, joins site-b-1 | host `4845` → container `4840` |
|
||||
| `site-b-1` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4844` → container `4840` |
|
||||
| `site-b-2` | `OTOPCUA_ROLES=driver`, joins the single mesh | host `4845` → container `4840` |
|
||||
|
||||
All containers bind Akka remoting to port `4053` inside their own network namespace; the `PublicHostname` of each matches its Compose service name. Akka mesh isolation is enforced purely by disjoint seed lists. Configuration-side isolation is enforced by `ServerCluster.ClusterId` — see "Multi-tenancy" below.
|
||||
Site nodes serve no UI and authenticate no users. The central cluster manages and deploys to them over the shared Akka mesh. All six nodes bind Akka remoting to port `4053` inside their own network namespace; `PublicHostname` for each matches its Compose service name.
|
||||
|
||||
## Multi-tenancy
|
||||
|
||||
All eight host nodes write to the same `OtOpcUa` ConfigDb. The `ServerCluster` table differentiates the three Akka meshes: each Akka cluster maps to one row, and each `ClusterNode` row's `ClusterId` ties the runtime node back to its owning cluster scope.
|
||||
All six host nodes write to the same `OtOpcUa` ConfigDb. The `ServerCluster` table differentiates the three logical clusters: each maps to one row, and each `ClusterNode` row's `ClusterId` ties the runtime node back to its owning cluster scope.
|
||||
|
||||
A one-shot `cluster-seed` Compose service (image `mcr.microsoft.com/mssql-tools`) waits for SQL + the EF auto-migration to complete and then INSERTs the rows below. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops:
|
||||
Two one-shot Compose services bootstrap the DB on bring-up: `migrator` applies the EF Core migrations (so a fresh SQL volume gets the schema with no operator step — the host nodes deliberately do **not** auto-migrate, since production owns schema changes), then `cluster-seed` (image `mcr.microsoft.com/mssql-tools`) INSERTs the rows below. `cluster-seed` and every host node `depend_on` the `migrator` completing (`service_completed_successfully`), so the seed never races an in-progress migration. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops:
|
||||
|
||||
| Akka mesh | `ServerCluster.ClusterId` | `ClusterNode.NodeId` rows |
|
||||
| Logical cluster | `ServerCluster.ClusterId` | `ClusterNode.NodeId` rows |
|
||||
|---|---|---|
|
||||
| Main | `MAIN` | `driver-a`, `driver-b` (OPC UA publishers) |
|
||||
| Main | `MAIN` | `central-1`, `central-2` (OPC UA publishers + admin UI) |
|
||||
| Site A | `SITE-A` | `site-a-1`, `site-a-2` |
|
||||
| Site B | `SITE-B` | `site-b-1`, `site-b-2` |
|
||||
|
||||
`ClusterNode` is the table for **OPC UA-publishing nodes** (not every Akka cluster member), which is why the main cluster's `admin-a` / `admin-b` don't get rows — they're control-plane-only.
|
||||
|
||||
Each `ClusterNode.NodeId` matches the node's `Cluster__PublicHostname` env value (Compose service name) — that's the lookup the runtime uses to resolve its own membership. `ApplicationUri` follows the `urn:OtOpcUa:<NodeId>` convention.
|
||||
|
||||
The SQL lives at `seed/seed-clusters.sql`; the wait-and-apply wrapper lives at `seed/entrypoint.sh`. To re-seed manually:
|
||||
@@ -72,21 +70,25 @@ The DriverHost actor doesn't spawn drivers from raw DriverInstance rows on its o
|
||||
# from the repo root
|
||||
docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
|
||||
# wait ~20 seconds for SQL to come up + all three clusters to form
|
||||
# the one-shot migrator + cluster-seed bootstrap the DB; watch the seed finish:
|
||||
docker compose -f docker-dev/docker-compose.yml logs -f cluster-seed # ^C once it prints "[cluster-seed] done."
|
||||
|
||||
open http://localhost # main cluster admin UI
|
||||
open http://site-a.localhost # site A admin UI
|
||||
open http://site-b.localhost # site B admin UI
|
||||
open http://localhost:9200 # Admin UI (Traefik → central-1 or central-2)
|
||||
open http://localhost:8089 # Traefik dashboard
|
||||
```
|
||||
|
||||
On macOS, `*.localhost` resolves to `127.0.0.1` automatically. On Linux add `127.0.0.1 site-a.localhost site-b.localhost` to `/etc/hosts` if your resolver doesn't.
|
||||
|
||||
The first build takes a few minutes (.NET SDK image + restore + publish). Subsequent rebuilds are faster with Docker's layer cache.
|
||||
The first build takes a few minutes (.NET SDK image + restore + publish). **No manual schema step is needed** — on a fresh SQL volume the one-shot `migrator` service applies the EF migrations (the host nodes deliberately don't auto-migrate, since production owns schema changes), then `cluster-seed` populates the cluster/namespace/driver rows. `cluster-seed` and the host nodes wait for the migrator via `service_completed_successfully`, so nothing races an in-progress migration. A plain `docker compose ... up -d` on an existing volume is a fast no-op for both — the named SQL volume keeps the schema + rows across restarts; only `down -v` wipes them, after which the next `up` re-migrates + re-seeds automatically.
|
||||
|
||||
## Auth (dev only)
|
||||
|
||||
All host containers authenticate against the shared GLAuth at `10.100.0.35:3893` (baseDN `dc=zb,dc=local`). `DevStubMode` is **not** active. Sign in with any test user (password `password`); `multi-role` / `password` returns all three roles (Administrator, Designer, Viewer). Group→role mappings are seeded by `seed/seed-clusters.sql`. The GLAuth source of truth + deploy runbook is in `scadaproj/infra/glauth/`. **Do not** enable `DevStubMode` outside local debugging — production must always bind a real LDAP backend.
|
||||
Central nodes authenticate against the shared GLAuth at `10.100.0.35:3893` (baseDN `dc=zb,dc=local`). `DevStubMode` is **not** active. Sign in with any test user (password `password`); `multi-role` / `password` returns all three roles (Administrator, Designer, Viewer). Group→role mappings are seeded by `seed/seed-clusters.sql`. The GLAuth source of truth + deploy runbook is in `scadaproj/infra/glauth/`. **Do not** enable `DevStubMode` outside local debugging — production must always bind a real LDAP backend.
|
||||
|
||||
## Headless deploy
|
||||
|
||||
```bash
|
||||
POST http://localhost:9200/api/deployments
|
||||
X-Api-Key: docker-dev-deploy-key
|
||||
```
|
||||
|
||||
## Tear down
|
||||
|
||||
@@ -98,15 +100,16 @@ The `-v` drops the SQL volume; remove it to keep ConfigDb state across restarts.
|
||||
|
||||
## Failover smoke
|
||||
|
||||
1. Watch the Traefik dashboard at `http://localhost:8089`. Both `admin-a` and `admin-b` should be listed as healthy in the `otopcua-admin` service.
|
||||
2. `docker compose -f docker-dev/docker-compose.yml stop admin-a` — `admin-b` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `admin-b` once its `/health/active` returns 200.
|
||||
3. `docker compose -f docker-dev/docker-compose.yml start admin-a` — `admin-a` rejoins as a follower; `admin-b` keeps the leader role until something disturbs it.
|
||||
1. Watch the Traefik dashboard at `http://localhost:8089`. Both `central-1` and `central-2` should be listed as healthy in the `otopcua-admin` service.
|
||||
2. `docker compose -f docker-dev/docker-compose.yml stop central-1` — `central-2` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `central-2` once its `/health/active` returns 200.
|
||||
3. `docker compose -f docker-dev/docker-compose.yml start central-1` — `central-1` rejoins as a follower; `central-2` keeps the leader role until something disturbs it.
|
||||
|
||||
## Notes
|
||||
|
||||
- This compose is for the **local Mac/Linux developer rig**. The team's CI + soak runs go to the remote docker host at `10.100.0.35` (see `docs/v2/dev-environment.md`); the file there mirrors this one with adjusted port bindings.
|
||||
- The OPC UA driver endpoints are reachable directly from the host (Traefik is only in front of the admin HTTP surface):
|
||||
- Main: `opc.tcp://localhost:4840` (driver-a), `opc.tcp://localhost:4841` (driver-b)
|
||||
- The OPC UA endpoints are reachable directly from the host (Traefik is only in front of the admin HTTP surface):
|
||||
- Main: `opc.tcp://localhost:4840` (central-1), `opc.tcp://localhost:4841` (central-2)
|
||||
- Site A: `opc.tcp://localhost:4842` (site-a-1), `opc.tcp://localhost:4843` (site-a-2)
|
||||
- Site B: `opc.tcp://localhost:4844` (site-b-1), `opc.tcp://localhost:4845` (site-b-2)
|
||||
- Galaxy + Wonderware drivers can't run in Linux containers (they need the Windows-only mxaccessgw + Historian SDK). On non-Windows, `DriverInstanceActor.ShouldStub(driverType, roles)` returns `true` for those types and the actor goes straight to a `Stubbed` state that returns deterministic success.
|
||||
- SQL persistence: ConfigDb state survives container restarts (named Docker volume). Drop the volume with `down -v` for a clean slate.
|
||||
|
||||
+152
-192
@@ -1,40 +1,46 @@
|
||||
# docker-dev/ — Mac-friendly multi-cluster fleet for v2 development + manual UI exercise.
|
||||
# docker-dev/ — Mac-friendly single-mesh hub-and-spoke fleet for v2 development + manual UI exercise.
|
||||
#
|
||||
# Stack (3 separate Akka clusters — all share the single `OtOpcUa` ConfigDb):
|
||||
# sql SQL Server 2022 — hosts the one ConfigDb that all three clusters use
|
||||
# ldap OpenLDAP with the dev users from C:\publish\glauth\auth.md mirrored in
|
||||
# Topology: ONE Akka mesh seeded by `central-1`. Logical separation between
|
||||
# tenants is by ServerCluster.ClusterId rows (MAIN / SITE-A / SITE-B) in the one
|
||||
# shared `OtOpcUa` ConfigDb — NOT by separate meshes. All six host nodes join the
|
||||
# same gossip ring and the central UI deploys to every cluster over it.
|
||||
#
|
||||
# Main cluster (existing — split-role admin / driver pair on a single Akka mesh):
|
||||
# admin-a OtOpcUa.Host with OTOPCUA_ROLES=admin (seed)
|
||||
# admin-b OtOpcUa.Host with OTOPCUA_ROLES=admin (joins admin-a)
|
||||
# driver-a OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
# driver-b OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a)
|
||||
# Stack:
|
||||
# sql SQL Server 2022 — hosts the one ConfigDb every node uses
|
||||
# cluster-seed one-shot mssql-tools job that INSERTs the ServerCluster +
|
||||
# ClusterNode rows scoping each tenant, then exits (idempotent)
|
||||
#
|
||||
# Site A cluster (2-node fused admin+driver):
|
||||
# site-a-1, site-a-2 OTOPCUA_ROLES=admin,driver, seed = site-a-1
|
||||
# central-1, central-2 OTOPCUA_ROLES=admin,driver — the ONLY UI + deploy
|
||||
# singleton, plus the MAIN cluster's OPC UA publishers.
|
||||
# Reachable at http://localhost:9200 (via Traefik).
|
||||
# central-1 is the Akka seed node; central-2 joins it.
|
||||
# site-a-1, site-a-2 OTOPCUA_ROLES=driver — driver-only members of the same
|
||||
# site-b-1, site-b-2 mesh, scoped to SITE-A / SITE-B by ClusterId. They
|
||||
# serve no UI and authenticate no users; the central
|
||||
# cluster manages and deploys to them over the mesh.
|
||||
#
|
||||
# Site B cluster (2-node fused admin+driver):
|
||||
# site-b-1, site-b-2 OTOPCUA_ROLES=admin,driver, seed = site-b-1
|
||||
# Auth is real LDAP against the shared GLAuth on the Linux Docker host
|
||||
# (10.100.0.35:3893, dc=zb,dc=local) — there is no LDAP container here.
|
||||
# Only the admin-role central nodes carry the Security__Ldap__* block.
|
||||
# Sign in `multi-role` / `password`.
|
||||
#
|
||||
# traefik PathPrefix → main cluster admin-a/admin-b; Host(`site-a.localhost`) →
|
||||
# site-a-*; Host(`site-b.localhost`) → site-b-*. Add the two site hosts to
|
||||
# your /etc/hosts (or rely on macOS `.localhost` auto-resolution).
|
||||
# traefik PathPrefix(`/`) → central-1 / central-2 (the single UI route).
|
||||
#
|
||||
# Multi-tenancy: ConfigDb is one schema with a `ServerCluster` table; each Akka cluster
|
||||
# corresponds to a row in it (ClusterId = "MAIN" / "SITE-A" / "SITE-B"), and each node's
|
||||
# `ClusterNode.NodeId` points back at the row that owns it. After first boot, sign in to
|
||||
# any cluster's Admin UI and create the matching ServerCluster + ClusterNode rows via
|
||||
# /clusters and /hosts so the runtime knows what configuration scope applies.
|
||||
# OPC UA endpoints (host-side port → container 4840):
|
||||
# central-1 :4840 central-2 :4841
|
||||
# site-a-1 :4842 site-a-2 :4843
|
||||
# site-b-1 :4844 site-b-2 :4845
|
||||
#
|
||||
# Akka mesh isolation: same system name "otopcua" + same remoting port 4053 inside each
|
||||
# container's own network namespace, but with disjoint seed-node lists — gossip never
|
||||
# crosses between the three meshes.
|
||||
# Headless deploy: POST http://localhost:9200/api/deployments with the
|
||||
# X-Api-Key header (Security__DeployApiKey = "docker-dev-deploy-key").
|
||||
#
|
||||
# SQL persistence: the otopcua-mssql-data named volume keeps the ConfigDb schema
|
||||
# + seeded clusters across `docker compose up` cycles; without it a recreate
|
||||
# silently drops the OtOpcUa database.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
# open http://localhost # main cluster Blazor admin UI
|
||||
# open http://site-a.localhost # site A admin UI
|
||||
# open http://site-b.localhost # site B admin UI
|
||||
# open http://localhost:9200 # central Blazor admin UI
|
||||
# open http://localhost:8089 # Traefik dashboard (8080 is the sister scadalink stack)
|
||||
#
|
||||
# Tear-down: docker compose -f docker-dev/docker-compose.yml down -v
|
||||
@@ -64,129 +70,68 @@ services:
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
|
||||
# ── Cluster seed (one-shot) ────────────────────────────────────────────────
|
||||
# Waits for SQL + the host containers' EF auto-migration, then INSERTs the
|
||||
# three ServerCluster rows and the six ClusterNode rows that scope each Akka
|
||||
# mesh inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops.
|
||||
cluster-seed:
|
||||
image: mcr.microsoft.com/mssql-tools:latest
|
||||
# ── Migrator (one-shot) ────────────────────────────────────────────────────
|
||||
# Applies EF Core migrations to the OtOpcUa ConfigDb so a fresh SQL volume gets
|
||||
# the schema with no operator step (the host nodes deliberately don't auto-
|
||||
# migrate — production owns schema changes). cluster-seed + every host node
|
||||
# depend on this completing, so nothing races an in-progress migration.
|
||||
# Idempotent: a no-op once the schema is current.
|
||||
migrator:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker-dev/Dockerfile
|
||||
target: migrator
|
||||
image: otopcua-migrator:dev
|
||||
depends_on:
|
||||
sql:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
OTOPCUA_CONFIG_CONNECTION: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
restart: "no"
|
||||
|
||||
# ── Cluster seed (one-shot) ────────────────────────────────────────────────
|
||||
# Runs only after `migrator` completes (so the schema is final — no race), then
|
||||
# INSERTs the three ServerCluster rows and the six ClusterNode rows that scope
|
||||
# each tenant inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops.
|
||||
cluster-seed:
|
||||
image: mcr.microsoft.com/mssql-tools:latest
|
||||
depends_on:
|
||||
migrator:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- ./seed:/seed:ro
|
||||
entrypoint: ["/bin/bash", "/seed/entrypoint.sh"]
|
||||
restart: "no"
|
||||
|
||||
# OpenLDAP was previously here but the bitnami/openldap:2.6 image was retired
|
||||
# (manifest gone) and bitnamilegacy/openldap:2.6 crashes during LDIF setup with
|
||||
# exit 68. For the dev compose every host container now runs with
|
||||
# Security__Ldap__DevStubMode=true, so any non-empty username/password
|
||||
# signs in as `Administrator`. Restore a real LDAP service when there's a need
|
||||
# for end-to-end LDAP coverage (the host code path is unchanged).
|
||||
# A local OpenLDAP container used to live here, but the bitnami/openldap:2.6
|
||||
# image was retired (manifest gone) and bitnamilegacy/openldap:2.6 crashes
|
||||
# during LDIF setup (exit 68). Rather than stub auth, the central (admin-role)
|
||||
# containers bind the shared GLAuth on the Linux Docker host (Security__Ldap__*
|
||||
# below: 10.100.0.35:3893, dc=zb,dc=local, DevStubMode=false) — so dev auth
|
||||
# exercises the real LDAP bind + group→role path. Sign in `multi-role` /
|
||||
# `password` (all roles) or any shared test user / `password`.
|
||||
|
||||
admin-a: &otopcua-host
|
||||
# ── Central cluster (2-node fused admin+driver) ─────────────────────────────
|
||||
# The only UI + deploy singleton; also the MAIN cluster's OPC UA publishers.
|
||||
# central-1 seeds the single Akka mesh that every other node joins.
|
||||
|
||||
central-1: &otopcua-host
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker-dev/Dockerfile
|
||||
target: runtime
|
||||
image: otopcua-host:dev
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
|
||||
admin-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "admin-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
|
||||
driver-a:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-a"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
# Resolved at runtime by GalaxyDriver.ResolveApiKey when a DriverInstance's
|
||||
# Gateway.ApiKeySecretRef = "env:GALAXY_MXGW_API_KEY".
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4840:4840"
|
||||
|
||||
driver-b:
|
||||
<<: *otopcua-host
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "driver-b"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4841:4840"
|
||||
|
||||
# ── Site A cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
# Shares the OtOpcUa ConfigDb with the main + site-b clusters; multi-tenancy is
|
||||
# enforced by ServerCluster.ClusterId rows (configure via /clusters after boot).
|
||||
# Akka isolation comes from the disjoint seed list (seed = site-a-1).
|
||||
|
||||
site-a-1:
|
||||
<<: *otopcua-host
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__PublicHostname: "central-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
@@ -203,6 +148,64 @@ services:
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4840:4840"
|
||||
|
||||
central-2:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "central-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4841:4840"
|
||||
|
||||
# ── Site A cluster (2-node driver-only) ─────────────────────────────────────
|
||||
# Driver-only members of the single mesh, scoped to SITE-A by ClusterId. No UI,
|
||||
# no user auth — managed + deployed to by the central cluster over the mesh.
|
||||
# All site nodes seed central-1.
|
||||
|
||||
site-a-1:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
# Resolved at runtime by GalaxyDriver.ResolveApiKey when a DriverInstance's
|
||||
# Gateway.ApiKeySecretRef = "env:GALAXY_MXGW_API_KEY".
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4842:4840"
|
||||
|
||||
@@ -210,61 +213,36 @@ services:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-a-1: { condition: service_started }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-a-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4843:4840"
|
||||
|
||||
# ── Site B cluster (2-node fused admin+driver) ──────────────────────────────
|
||||
# ── Site B cluster (2-node driver-only) ─────────────────────────────────────
|
||||
|
||||
site-b-1:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-1"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4844:4840"
|
||||
@@ -273,30 +251,16 @@ services:
|
||||
<<: *otopcua-host
|
||||
depends_on:
|
||||
sql: { condition: service_healthy }
|
||||
site-b-1: { condition: service_started }
|
||||
central-1: { condition: service_started }
|
||||
migrator: { condition: service_completed_successfully }
|
||||
environment:
|
||||
OTOPCUA_ROLES: "admin,driver"
|
||||
ASPNETCORE_URLS: "http://+:9000"
|
||||
OTOPCUA_ROLES: "driver"
|
||||
ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;"
|
||||
Cluster__Hostname: "0.0.0.0"
|
||||
Cluster__Port: "4053"
|
||||
Cluster__PublicHostname: "site-b-2"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053"
|
||||
Cluster__Roles__0: "admin"
|
||||
Cluster__Roles__1: "driver"
|
||||
Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345"
|
||||
Security__Jwt__Issuer: "otopcua-dev"
|
||||
Security__Jwt__Audience: "otopcua-dev"
|
||||
Security__Ldap__Enabled: "true"
|
||||
Security__Ldap__DevStubMode: "false"
|
||||
Security__Ldap__Server: "10.100.0.35"
|
||||
Security__Ldap__Port: "3893"
|
||||
Security__Ldap__Transport: "None"
|
||||
Security__Ldap__AllowInsecure: "true"
|
||||
Security__Ldap__SearchBase: "dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountDn: "cn=serviceaccount,dc=zb,dc=local"
|
||||
Security__Ldap__ServiceAccountPassword: "serviceaccount123"
|
||||
Security__DeployApiKey: "docker-dev-deploy-key"
|
||||
Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"
|
||||
Cluster__Roles__0: "driver"
|
||||
GALAXY_MXGW_API_KEY: "${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_GI7-tNozYE6cXGUSgEzL3AHDV7bYcYIHdMwKYgyHdX4}"
|
||||
ports:
|
||||
- "4845:4840"
|
||||
@@ -314,12 +278,8 @@ services:
|
||||
volumes:
|
||||
- ./traefik-dynamic.yml:/etc/traefik/dynamic.yml:ro
|
||||
depends_on:
|
||||
- admin-a
|
||||
- admin-b
|
||||
- site-a-1
|
||||
- site-a-2
|
||||
- site-b-1
|
||||
- site-b-2
|
||||
- central-1
|
||||
- central-2
|
||||
|
||||
volumes:
|
||||
# SQL Server data dir — persists the OtOpcUa ConfigDb across container recreates.
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
# docker-dev cluster-seed entrypoint. Waits for the OtOpcUa ConfigDb schema to
|
||||
# be in place, then applies the idempotent row seed.
|
||||
# docker-dev cluster-seed entrypoint. Applies the idempotent row seed.
|
||||
#
|
||||
# IMPORTANT: this container does NOT run EF migrations — sqlcmd can't execute
|
||||
# the V2 migration script cleanly because it contains CREATE PROCEDURE
|
||||
# statements inside IF NOT EXISTS BEGIN ... END blocks (procs must be the
|
||||
# first statement in their batch). Migrations are owned by the operator:
|
||||
#
|
||||
# dotnet ef database update \
|
||||
# --project src/Core/ZB.MOM.WW.OtOpcUa.Configuration \
|
||||
# --startup-project src/Server/ZB.MOM.WW.OtOpcUa.Host
|
||||
#
|
||||
# (with ConnectionStrings__ConfigDb pointing at Server=localhost,14330;...).
|
||||
# Once the schema is in place, restart the cluster-seed container — or just
|
||||
# `docker compose up -d` and the seed will pick up where it left off thanks to
|
||||
# the IF NOT EXISTS guards in seed-clusters.sql.
|
||||
# This container does NOT run EF migrations — sqlcmd can't execute the migration
|
||||
# script cleanly (it has CREATE PROCEDURE inside IF NOT EXISTS BEGIN ... END
|
||||
# blocks; procs must be the first statement in their batch). The schema is owned
|
||||
# by the `migrator` Compose service (dotnet ef), which this seed depends on via
|
||||
# `service_completed_successfully` — so by the time we run, migrations are fully
|
||||
# applied. The dbo.ServerCluster wait below is therefore just a fast sanity check.
|
||||
# Re-runs are safe: every insert in seed-clusters.sql is IF NOT EXISTS-guarded.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -37,7 +30,7 @@ until run_sql_in master -Q "SELECT 1" >/dev/null 2>&1; do
|
||||
done
|
||||
echo "[cluster-seed] SQL Server up."
|
||||
|
||||
echo "[cluster-seed] waiting for ${DB} database + dbo.ServerCluster table (operator must run dotnet ef database update)..."
|
||||
echo "[cluster-seed] verifying ${DB} schema (dbo.ServerCluster) is present (migrator should have applied it)..."
|
||||
until run_sql_in "$DB" -Q "IF OBJECT_ID('dbo.ServerCluster') IS NULL THROW 50001, 'missing', 1; SELECT 1" >/dev/null 2>&1; do
|
||||
sleep 3
|
||||
done
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
--
|
||||
-- Populates:
|
||||
-- ServerCluster MAIN, SITE-A, SITE-B
|
||||
-- ClusterNode driver-a, driver-b → MAIN
|
||||
-- site-a-1, site-a-2 → SITE-A
|
||||
-- site-b-1, site-b-2 → SITE-B
|
||||
-- ClusterNode central-1, central-2 → MAIN
|
||||
-- site-a-1, site-a-2 → SITE-A
|
||||
-- site-b-1, site-b-2 → SITE-B
|
||||
--
|
||||
-- ServerCluster.NodeCount + RedundancyMode are coupled by CHECK constraint:
|
||||
-- NodeCount=1 ⇒ RedundancyMode='None'
|
||||
@@ -32,7 +32,7 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'MAIN')
|
||||
VALUES
|
||||
('MAIN', 'Main cluster', 'zb', 'docker-dev',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — admin-a/admin-b control-plane, driver-a/driver-b OPC UA publishers.',
|
||||
'docker-dev seed — central-1/central-2 fused admin+driver: UI + deploy singleton + MAIN OPC UA publishers.',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-A')
|
||||
@@ -41,7 +41,7 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-A')
|
||||
VALUES
|
||||
('SITE-A', 'Site A', 'zb', 'site-a',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev seed — 2-node driver-only, managed by the central cluster over the shared mesh (empty until configured).',
|
||||
'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
@@ -50,11 +50,11 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
VALUES
|
||||
('SITE-B', 'Site B', 'zb', 'site-b',
|
||||
2, 'Warm', 1,
|
||||
'docker-dev seed — 2-node fused admin+driver cluster.',
|
||||
'docker-dev seed — 2-node driver-only, managed by the central cluster over the shared mesh (empty until configured).',
|
||||
'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — main cluster OPC UA publishers
|
||||
-- ClusterNode — central cluster (MAIN UI + deploy singleton + OPC UA publishers)
|
||||
--
|
||||
-- NodeId is "<compose-service>:4053" so it matches what ClusterRoleInfo +
|
||||
-- ConfigPublishCoordinator derive from Akka.Cluster.Get(system).State.Members
|
||||
@@ -62,15 +62,15 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B')
|
||||
-- ClusterNode.NodeId; mismatched values cause FK 547 on deploy.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-a:4053')
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'central-1:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-a:4053', 'MAIN', 'driver-a', 4840, 8081, 'urn:OtOpcUa:driver-a', 200, 1, 'docker-dev-seed');
|
||||
VALUES ('central-1:4053', 'MAIN', 'central-1', 4840, 8081, 'urn:OtOpcUa:central-1', 200, 1, 'docker-dev-seed');
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-b:4053')
|
||||
IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'central-2:4053')
|
||||
INSERT INTO dbo.ClusterNode
|
||||
(NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy)
|
||||
VALUES ('driver-b:4053', 'MAIN', 'driver-b', 4840, 8081, 'urn:OtOpcUa:driver-b', 150, 1, 'docker-dev-seed');
|
||||
VALUES ('central-2:4053', 'MAIN', 'central-2', 4840, 8081, 'urn:OtOpcUa:central-2', 150, 1, 'docker-dev-seed');
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- ClusterNode — site A
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
# docker-dev companion to scripts/install/traefik-dynamic.yml. Routes three
|
||||
# Akka clusters that share the Compose network:
|
||||
# docker-dev companion to scripts/install/traefik-dynamic.yml. Routes the single
|
||||
# central cluster UI on the shared Compose network:
|
||||
#
|
||||
# - Main cluster (default): PathPrefix(`/`) → admin-a / admin-b.
|
||||
# - Site A cluster: Host(`site-a.localhost`) → site-a-1 / site-a-2.
|
||||
# - Site B cluster: Host(`site-b.localhost`) → site-b-1 / site-b-2.
|
||||
# - Central UI (only route): PathPrefix(`/`) → central-1 / central-2.
|
||||
#
|
||||
# Host-header rules are more specific than PathPrefix, so they win over the
|
||||
# default router for the site hostnames automatically — no priority field needed.
|
||||
# The driver-only site nodes serve no UI, so they have no Traefik route — the
|
||||
# central cluster manages and deploys to them over the shared Akka mesh.
|
||||
|
||||
http:
|
||||
routers:
|
||||
@@ -15,16 +13,6 @@ http:
|
||||
rule: "PathPrefix(`/`)"
|
||||
service: otopcua-admin
|
||||
|
||||
otopcua-site-a:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-a.localhost`)"
|
||||
service: otopcua-site-a
|
||||
|
||||
otopcua-site-b:
|
||||
entryPoints: ["web"]
|
||||
rule: "Host(`site-b.localhost`)"
|
||||
service: otopcua-site-b
|
||||
|
||||
services:
|
||||
otopcua-admin:
|
||||
loadBalancer:
|
||||
@@ -37,44 +25,8 @@ http:
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://admin-a:9000"
|
||||
- url: "http://admin-b:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-a:
|
||||
loadBalancer:
|
||||
# Blazor Server uses SignalR; the WebSocket upgrade must hit the same
|
||||
# backend that owns the circuit ID. Sticky cookie keeps each session
|
||||
# pinned to one node so the post-handshake WebSocket doesn't 404.
|
||||
sticky:
|
||||
cookie:
|
||||
name: otopcua_lb
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://site-a-1:9000"
|
||||
- url: "http://site-a-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
timeout: 2s
|
||||
|
||||
otopcua-site-b:
|
||||
loadBalancer:
|
||||
# Blazor Server uses SignalR; the WebSocket upgrade must hit the same
|
||||
# backend that owns the circuit ID. Sticky cookie keeps each session
|
||||
# pinned to one node so the post-handshake WebSocket doesn't 404.
|
||||
sticky:
|
||||
cookie:
|
||||
name: otopcua_lb
|
||||
httpOnly: true
|
||||
sameSite: lax
|
||||
servers:
|
||||
- url: "http://site-b-1:9000"
|
||||
- url: "http://site-b-2:9000"
|
||||
- url: "http://central-1:9000"
|
||||
- url: "http://central-2:9000"
|
||||
healthCheck:
|
||||
path: /health/active
|
||||
interval: 5s
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
# Per-ClusterId Scoping (hub-and-spoke single mesh) — Design
|
||||
|
||||
**Date:** 2026-06-07
|
||||
**Status:** Approved (brainstorming complete; next step: writing-plans)
|
||||
**Branch:** `feat/per-cluster-scoping`
|
||||
|
||||
## Goal
|
||||
|
||||
Let one **central** cluster's Admin UI manage and deploy to multiple
|
||||
logically-separate clusters that share a single Akka mesh. The central cluster
|
||||
runs 2 fused `admin,driver` nodes (the only UI + the only deploy singleton);
|
||||
each site cluster runs 2 `driver`-only nodes. A single global deploy from the
|
||||
central UI reaches every node, and **each node applies only the slice of the
|
||||
configuration that belongs to its own `ClusterId`** — its drivers and its OPC UA
|
||||
address space. Ship global deploy first; per-cluster deploy is a later follow-up.
|
||||
|
||||
## Why this needs runtime work
|
||||
|
||||
The deploy channel is **in-mesh**: AdminUI → `admin-operations` singleton →
|
||||
`ConfigPublishCoordinator` → DistributedPubSub → driver nodes. DistributedPubSub
|
||||
does not cross Akka mesh boundaries, so for the central UI to deploy to site
|
||||
servers the site nodes **must join the central mesh**. But the runtime currently
|
||||
assumes **one Akka mesh == one logical cluster**:
|
||||
|
||||
- `DriverHostActor.ReconcileDrivers` spawns **every** `DriverInstance` in the
|
||||
artifact with no cluster filter (`DriverHostActor.cs:367`). The `ClusterId` on
|
||||
a spec is used only to *label* health snapshots.
|
||||
- `ConfigPublishCoordinator.DiscoverDriverNodes` broadcasts to **every** driver
|
||||
member of the mesh, no `ClusterId` filter (`ConfigPublishCoordinator.cs:248`).
|
||||
- `ConfigComposer.SnapshotAndFlattenAsync` snapshots **all** clusters' rows into
|
||||
one flat artifact; the address space is built from the whole thing.
|
||||
|
||||
Consequence today: put MAIN + SITE-A + SITE-B nodes in one mesh and every node
|
||||
spawns every cluster's drivers (Galaxy auto-stubs on Linux, so it *would* start)
|
||||
and serves a **merged** address space of all three clusters. That is why the
|
||||
existing docker-dev rig uses three isolated meshes.
|
||||
|
||||
This design adds the missing per-`ClusterId` scoping so a shared mesh behaves as
|
||||
distinct logical clusters.
|
||||
|
||||
## Approach (chosen: A — node-side, parse-time filter, ClusterId from the artifact)
|
||||
|
||||
Each node resolves *its own* `ClusterId` by finding its `NodeId`
|
||||
(`_localNode.Value`, format `"host:port"`, e.g. `central-1:4053`) in the
|
||||
artifact's `ClusterNode` rows, then filters both the driver specs and the
|
||||
address-space composition to that cluster.
|
||||
|
||||
The artifact is a self-contained, consistent snapshot that already includes
|
||||
`ClusterNode` + `DriverInstance` + `Namespace` + `UnsArea` (all carrying
|
||||
`ClusterId`), so resolution needs **no extra DB query** and has no
|
||||
seal-vs-apply inconsistency window. The coordinator stays a **single broadcast**;
|
||||
every node just applies its own slice.
|
||||
|
||||
### Alternatives considered
|
||||
|
||||
- **B — control-plane per-node artifact slices.** `ConfigComposer` emits a
|
||||
filtered artifact per cluster and the coordinator dispatches the right slice to
|
||||
each node. Rejected: turns one broadcast into per-cluster dispatch (a large
|
||||
change to the deploy/ack model), contradicts "ship global first," and still
|
||||
needs the same transitive `ClusterId` resolution.
|
||||
- **C — runtime DB lookup for ClusterId.** Node queries `ClusterNode` by its
|
||||
address at apply time, then filters post-parse. Rejected: extra DB round-trip
|
||||
per node per deploy and a seal-vs-apply inconsistency window; the artifact
|
||||
already contains everything A needs.
|
||||
|
||||
## Components
|
||||
|
||||
### 1. Self-`ClusterId` resolution
|
||||
|
||||
New helper `DeploymentArtifact.ParseClusterScope(blob, nodeId)` returning
|
||||
`(string? ClusterId, int ClusterCount)`:
|
||||
- `ClusterId` = the `ClusterNode` row whose `NodeId == nodeId`, else `null`.
|
||||
- `ClusterCount` = number of `ServerCluster` rows in the artifact.
|
||||
|
||||
Both `DriverHostActor` and `OpcUaPublishActor` call it with `_localNode.Value`.
|
||||
|
||||
**Fallback rule (single source of truth for every filter site):**
|
||||
|
||||
| Condition | Behavior |
|
||||
|---|---|
|
||||
| `ClusterCount ≤ 1` | **Lenient — no filter** (legacy single-cluster meshes + the entire existing test suite behave exactly as today). |
|
||||
| `ClusterCount > 1` and `ClusterId` resolved | **Filter to my cluster.** |
|
||||
| `ClusterCount > 1` and `ClusterId` unresolved | **Apply nothing + log error** (a node in a multi-cluster mesh with no `ClusterNode` row is misconfigured; serving everything would leak other clusters' data). |
|
||||
|
||||
The `ClusterCount ≤ 1` lenient branch is what protects the existing ~210 v2
|
||||
tests and any single-cluster deployment from any behavior change.
|
||||
|
||||
### 2. Driver-spawn filter — `DriverHostActor`
|
||||
|
||||
`DriverInstanceSpec` already carries `ClusterId`, so in `ReconcileDrivers` (and
|
||||
the restart `RestoreServedState` path) apply a one-line predicate over the parsed
|
||||
specs using the fallback rule. In multi-cluster mode, specs with a `null`
|
||||
`ClusterId` are excluded + logged (should never occur — `ConfigComposer` always
|
||||
serializes the column).
|
||||
|
||||
### 3. Address-space filter — `ParseComposition` + `OpcUaPublishActor`
|
||||
|
||||
Add `DeploymentArtifact.ParseComposition(blob, clusterId)`. At parse time the raw
|
||||
artifact entities still carry `ClusterId` / `NamespaceId` / `UnsAreaId` /
|
||||
`DriverInstanceId`, so build in-cluster id sets from the artifact and filter every
|
||||
projection:
|
||||
|
||||
| Projection | Filter predicate |
|
||||
|---|---|
|
||||
| `UnsAreas` | `ClusterId == mine` (direct) |
|
||||
| `UnsLines` | `UnsAreaId ∈ myAreas` |
|
||||
| `EquipmentNodes` | `DriverInstanceId ∈ myDrivers` |
|
||||
| `DriverInstancePlans` | `DriverInstanceId ∈ myDrivers` |
|
||||
| `GalaxyTags` / `EquipmentTags` | `DriverInstanceId ∈ myDrivers` |
|
||||
| `ScriptedAlarmPlans` | `EquipmentId ∈ myEquipment` |
|
||||
|
||||
`OpcUaPublishActor.HandleRebuild` resolves `myClusterId` and calls the filtered
|
||||
parse before `Phase7Planner.Compute`. `_lastApplied` becomes the filtered
|
||||
composition, so the incremental diff stays correct across redeploys. The no-arg
|
||||
`ParseComposition(blob)` is left untouched (legacy / single-cluster path).
|
||||
|
||||
### 4. Deploy ack / convergence
|
||||
|
||||
`ConfigPublishCoordinator` keeps broadcasting to all driver members and waiting
|
||||
for all acks (in the new rig all 6 nodes are driver-role). Each node applies its
|
||||
slice and acks — **including a node whose cluster has an empty slice**. The one
|
||||
risk: the ack must fire even when the node's plan is empty. Implementation will
|
||||
**verify the ack is unconditional** and add a small fix if it is currently gated
|
||||
on a non-empty change set. No change to `DiscoverDriverNodes`.
|
||||
|
||||
### 5. docker-dev compose + seed rewrite
|
||||
|
||||
- **compose:** remove `admin-a` / `admin-b` / `driver-a` / `driver-b`; add
|
||||
`central-1` / `central-2` (`OTOPCUA_ROLES=admin,driver`, seed = `central-1`,
|
||||
OPC UA `4840` / `4841`, ASPNETCORE UI on `:9000`). `site-a-1/2`, `site-b-1/2`
|
||||
become `driver`-only (`OTOPCUA_ROLES=driver`, `Cluster__Roles__0=driver`, seed
|
||||
→ `central-1`, OPC UA `4842`–`4845`), dropping their UI / Jwt / Ldap /
|
||||
DeployApiKey env + Traefik exposure. All nodes share the one ConfigDb.
|
||||
- **traefik:** single `PathPrefix(/)` router → `central-1` / `central-2`
|
||||
(sticky cookie); drop the two site routers + services in both
|
||||
`docker-compose.yml` and `traefik-dynamic.yml`.
|
||||
- **seed SQL (`seed/seed-clusters.sql`):** MAIN `ClusterNode` rows become
|
||||
`central-1:4053` / `central-2:4053` (replacing `driver-a` / `driver-b`);
|
||||
SITE-A / SITE-B keep their `ServerCluster` + 2 `ClusterNode` rows but **no
|
||||
drivers/tags** (empty sites). Update the `Notes` columns + the file header
|
||||
comments. The Galaxy namespace / driver / tags stay on MAIN (they run on the
|
||||
central fused nodes).
|
||||
- **compose header + comment blocks:** rewrite the topology description (single
|
||||
mesh, hub-and-spoke, central-only UI).
|
||||
|
||||
## Data flow (after the change)
|
||||
|
||||
1. Operator clicks **Deploy** in the central UI (or `POST /api/deployments`).
|
||||
2. `admin-operations` singleton (on a central node) → `ConfigComposer` snapshots
|
||||
**all** clusters' rows into one artifact → `ConfigPublishCoordinator`
|
||||
broadcasts `DispatchDeployment` to **all** driver members.
|
||||
3. Each node resolves its own `ClusterId` from the artifact's `ClusterNode` rows.
|
||||
4. `DriverHostActor` spawns only its cluster's `DriverInstance`s.
|
||||
5. `OpcUaPublishActor` materialises only its cluster's address space.
|
||||
6. Every node acks; the coordinator seals the deployment when all acks arrive.
|
||||
7. Result: central `:4840`/`:4841` serve MAIN's Galaxy tree; site
|
||||
`:4842`–`:4845` serve only their own (empty until configured) trees.
|
||||
|
||||
## Error handling
|
||||
|
||||
- **Misconfigured node** (multi-cluster mesh, no matching `ClusterNode` row):
|
||||
applies nothing, logs an error, still acks (so the deploy converges rather than
|
||||
hanging). Surfaced for the operator to add the missing `ClusterNode` row.
|
||||
- **Pre-PR / single-cluster artifacts:** `ClusterCount ≤ 1` → lenient no-filter,
|
||||
identical to current behavior.
|
||||
- **Empty cluster slice:** node applies an empty plan and acks normally.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit:** `ParseClusterScope` (match / miss / count); `ParseComposition(blob,
|
||||
clusterId)` (cross-cluster projections excluded; transitive resolution for
|
||||
UnsLine / Equipment / Tag / ScriptedAlarm); the driver-spec filter predicate
|
||||
(lenient / strict / unresolved-strict).
|
||||
- **Integration:** a 2-cluster scoping test on the in-process harness — two
|
||||
driver nodes assigned to different `ClusterId`s, one deploy, assert each spawns
|
||||
only its cluster's drivers and materialises only its cluster's tree.
|
||||
- **Backward-compat:** the existing single-cluster suites must stay green (the
|
||||
`ClusterCount ≤ 1` lenient branch guarantees this).
|
||||
- **Live (docker-dev rig):** bring the rig up, sign into the central UI, confirm
|
||||
3 clusters listed, deploy, confirm `:4840` shows the Galaxy tree and
|
||||
`:4842`/`:4844` are empty (not the merged tree).
|
||||
|
||||
## Classification
|
||||
|
||||
High-risk — touches the actor model, the Phase7 data contract, and the deploy
|
||||
path. The implementation plan will be TDD'd section by section.
|
||||
|
||||
## Out of scope (follow-ups)
|
||||
|
||||
- **Per-cluster deploy** (deploy just SITE-A from the UI) — global deploy ships
|
||||
first; per-cluster targeting is a later coordinator + UI enhancement.
|
||||
- **Seeding demo drivers on the sites** — sites start empty; drivers are added
|
||||
via the central UI.
|
||||
@@ -0,0 +1,825 @@
|
||||
# Per-ClusterId Scoping (hub-and-spoke single mesh) Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Let one central cluster's Admin UI deploy to multiple logically-separate
|
||||
clusters that share one Akka mesh, with each node applying only its own
|
||||
`ClusterId`'s drivers + OPC UA address space.
|
||||
|
||||
**Architecture:** Approach A — node-side, parse-time filtering. Each node resolves
|
||||
its own `ClusterId` from the deployment artifact's `ClusterNode` rows (no extra DB
|
||||
query) and filters both the driver specs and the address-space composition to that
|
||||
cluster. The coordinator stays a single broadcast; every node applies its own
|
||||
slice and acks. A single-cluster artifact filters to nothing-different, so existing
|
||||
deployments + tests are unaffected.
|
||||
|
||||
**Tech Stack:** .NET 10, Akka.NET, EF Core, `System.Text.Json` (artifact parse),
|
||||
xUnit v2 + Shouldly (Runtime.Tests uses Akka.TestKit.Xunit2), Docker Compose + Traefik.
|
||||
|
||||
**Design doc:** `docs/plans/2026-06-07-per-cluster-scoping-design.md` (approved).
|
||||
|
||||
**The fallback rule (single source of truth — implemented once in `ResolveClusterScope`):**
|
||||
- artifact has **≤1 cluster** → `None` (apply everything; legacy/single-cluster + all existing tests behave identically).
|
||||
- artifact has **>1 cluster** and the node's `ClusterNode` row is found → `ScopeTo(clusterId)`.
|
||||
- artifact has **>1 cluster** and the node's row is **not** found → `Suppress` (apply nothing + the caller logs).
|
||||
|
||||
**Hard rules (carry through every task):** never `git add .` — stage by explicit
|
||||
path; never stage `sql_login.txt` or `src/Server/.../pki/`; never echo the gateway
|
||||
API key into a *new* tracked file; never force-push or skip hooks.
|
||||
|
||||
---
|
||||
|
||||
## Task 1: `ResolveClusterScope` + node-scoped `ParseDriverInstances`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 6, Task 7, Task 8
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs`
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs`
|
||||
|
||||
**Context:** `DeploymentArtifact` is a static JSON decoder over the artifact blob
|
||||
produced by `ConfigComposer.SnapshotAndFlattenAsync`. The artifact root has
|
||||
Pascal-case arrays: `Clusters` (ServerCluster, has `ClusterId`), `Nodes`
|
||||
(ClusterNode, has `NodeId` + `ClusterId`), `DriverInstances` (has `ClusterId`),
|
||||
`Namespaces`/`UnsAreas` (have `ClusterId`), `Equipment`/`Tags`/`UnsLines`/`ScriptedAlarms`
|
||||
(no `ClusterId` — traced via `DriverInstanceId`/`UnsAreaId`/`EquipmentId`).
|
||||
`DriverInstanceSpec` already carries `ClusterId` (`DeploymentArtifact.cs:19`).
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Add to `DeploymentArtifactTests.cs`. Reuse the file's existing artifact-blob
|
||||
helper if present; otherwise add this minimal builder:
|
||||
|
||||
```csharp
|
||||
private static byte[] BlobOf(object snapshot) =>
|
||||
System.Text.Json.JsonSerializer.SerializeToUtf8Bytes(snapshot);
|
||||
|
||||
private static object MultiClusterSnapshot() => new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "main-galaxy", Name = "g", DriverType = "GalaxyMxGateway", Enabled = true, DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "sa-modbus", Name = "m", DriverType = "Modbus", Enabled = true, DriverConfig = "{}", ClusterId = "SITE-A", NamespaceId = "sa-ns" },
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
```csharp
|
||||
[Fact]
|
||||
public void ResolveClusterScope_single_cluster_artifact_returns_None()
|
||||
{
|
||||
var blob = BlobOf(new { Clusters = new[] { new { ClusterId = "MAIN" } }, Nodes = Array.Empty<object>() });
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(blob, "central-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.None);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveClusterScope_multi_cluster_known_node_scopes_to_its_cluster()
|
||||
{
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(BlobOf(MultiClusterSnapshot()), "site-a-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.ScopeTo);
|
||||
scope.ClusterId.ShouldBe("SITE-A");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveClusterScope_multi_cluster_unknown_node_suppresses()
|
||||
{
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(BlobOf(MultiClusterSnapshot()), "ghost-9:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.Suppress);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_returns_only_my_clusters_drivers()
|
||||
{
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(BlobOf(MultiClusterSnapshot()), "central-1:4053");
|
||||
specs.Select(s => s.DriverInstanceId).ShouldBe(new[] { "main-galaxy" });
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_unknown_node_returns_empty()
|
||||
{
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(BlobOf(MultiClusterSnapshot()), "ghost-9:4053");
|
||||
specs.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_single_cluster_returns_all()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" } },
|
||||
Nodes = new[] { new { NodeId = "n1:4053", ClusterId = "MAIN" } },
|
||||
DriverInstances = new[] { new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "d1", Name = "d", DriverType = "Modbus", Enabled = true, DriverConfig = "{}", ClusterId = "MAIN" } },
|
||||
});
|
||||
DeploymentArtifact.ParseDriverInstances(blob, "anything:4053").Select(s => s.DriverInstanceId).ShouldBe(new[] { "d1" });
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run the tests — verify they fail**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
Expected: FAIL — `ClusterFilterMode` / `ResolveClusterScope` / the 2-arg `ParseDriverInstances` don't exist.
|
||||
|
||||
**Step 3: Implement**
|
||||
|
||||
In `DeploymentArtifact.cs`, add the scope types just above `public static class DeploymentArtifact` (top-level, same namespace):
|
||||
|
||||
```csharp
|
||||
/// <summary>How a node should scope a deployment artifact to its own ClusterId.</summary>
|
||||
public enum ClusterFilterMode { None, ScopeTo, Suppress }
|
||||
|
||||
/// <summary>Resolved scoping decision for a node against an artifact.</summary>
|
||||
/// <param name="Mode">None = apply everything (single-cluster / legacy); ScopeTo = filter to <paramref name="ClusterId"/>; Suppress = apply nothing.</param>
|
||||
/// <param name="ClusterId">The node's ClusterId when <paramref name="Mode"/> is ScopeTo; otherwise null.</param>
|
||||
public readonly record struct ClusterScope(ClusterFilterMode Mode, string? ClusterId);
|
||||
```
|
||||
|
||||
Inside the class, add `ResolveClusterScope` and the 2-arg `ParseDriverInstances`
|
||||
overload (place after the existing `ParseDriverInstances`):
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Resolve how a node should scope a multi-cluster deployment artifact to its own logical
|
||||
/// cluster, from the same consistent snapshot it applies (the artifact's ClusterNode rows map
|
||||
/// NodeId → ClusterId; the ServerCluster count decides single- vs multi-cluster). Fallback rule:
|
||||
/// ≤1 cluster ⇒ no filter (legacy single-cluster meshes + existing tests unchanged); >1 cluster
|
||||
/// with the node's row found ⇒ scope to that ClusterId; >1 cluster with the row missing ⇒
|
||||
/// suppress (apply nothing) — a node in a multi-cluster mesh with no ClusterNode row is
|
||||
/// misconfigured and must not serve other clusters' data.
|
||||
/// </summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="nodeId">This node's identity in "host:port" form (matches ClusterNode.NodeId).</param>
|
||||
/// <returns>The scoping decision for this node.</returns>
|
||||
public static ClusterScope ResolveClusterScope(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
if (blob.IsEmpty) return new ClusterScope(ClusterFilterMode.None, null);
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(blob.ToArray());
|
||||
var root = doc.RootElement;
|
||||
var clusterCount = root.TryGetProperty("Clusters", out var cl) && cl.ValueKind == JsonValueKind.Array
|
||||
? cl.GetArrayLength() : 0;
|
||||
if (clusterCount <= 1) return new ClusterScope(ClusterFilterMode.None, null);
|
||||
|
||||
string? myCluster = null;
|
||||
if (root.TryGetProperty("Nodes", out var nodes) && nodes.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var el in nodes.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var nid = el.TryGetProperty("NodeId", out var nEl) ? nEl.GetString() : null;
|
||||
if (!string.Equals(nid, nodeId, StringComparison.Ordinal)) continue;
|
||||
myCluster = el.TryGetProperty("ClusterId", out var cEl) ? cEl.GetString() : null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return string.IsNullOrWhiteSpace(myCluster)
|
||||
? new ClusterScope(ClusterFilterMode.Suppress, null)
|
||||
: new ClusterScope(ClusterFilterMode.ScopeTo, myCluster);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return new ClusterScope(ClusterFilterMode.None, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Cluster-scoped overload: the driver specs a node should host given its NodeId.</summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="nodeId">This node's identity in "host:port" form.</param>
|
||||
/// <returns>The filtered driver specs per the node's <see cref="ResolveClusterScope"/> decision.</returns>
|
||||
public static IReadOnlyList<DriverInstanceSpec> ParseDriverInstances(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
var scope = ResolveClusterScope(blob, nodeId);
|
||||
var all = ParseDriverInstances(blob);
|
||||
return scope.Mode switch
|
||||
{
|
||||
ClusterFilterMode.Suppress => Array.Empty<DriverInstanceSpec>(),
|
||||
ClusterFilterMode.ScopeTo => all.Where(
|
||||
s => string.Equals(s.ClusterId, scope.ClusterId, StringComparison.Ordinal)).ToArray(),
|
||||
_ => all,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4: Run the tests — verify they pass**
|
||||
|
||||
Run: `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
Expected: PASS (new + all pre-existing DeploymentArtifact tests).
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs
|
||||
git commit -m "feat(runtime): ClusterId scope resolution + node-scoped driver-spec parse"
|
||||
```
|
||||
|
||||
**Acceptance:** `ResolveClusterScope` implements the 3-branch rule; the scoped
|
||||
`ParseDriverInstances` filters per the rule; the no-arg overload is untouched.
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Node-scoped `ParseComposition` (address-space filter)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 6, Task 7, Task 8
|
||||
|
||||
**Blocked by:** Task 1 (uses `ClusterScope` / `ResolveClusterScope`, same file).
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs`
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs`
|
||||
|
||||
**Context:** `ParseComposition(blob)` returns a `Phase7CompositionResult` whose
|
||||
projections carry no `ClusterId`. Filter by building in-cluster id sets from the
|
||||
raw artifact: `DriverInstanceId`s and `UnsAreaId`s whose row's `ClusterId` matches,
|
||||
plus `EquipmentId`s whose `DriverInstanceId` is in-cluster. Then filter each
|
||||
projection (areas by `UnsAreaId`, lines by `UnsAreaId`, equipment by `EquipmentId`,
|
||||
drivers/galaxyTags/equipmentTags by `DriverInstanceId`, alarms by `EquipmentId`).
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Extend `MultiClusterSnapshot()` from Task 1 with namespaces + tags so galaxy-tag
|
||||
filtering is exercised, then add the test:
|
||||
|
||||
```csharp
|
||||
private static object MultiClusterSnapshotWithTags() => new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceId = "main-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
new { DriverInstanceId = "sa-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "SITE-A", NamespaceId = "sa-ns" },
|
||||
},
|
||||
Namespaces = new[]
|
||||
{
|
||||
new { NamespaceId = "main-ns", ClusterId = "MAIN", Kind = 1 },
|
||||
new { NamespaceId = "sa-ns", ClusterId = "SITE-A", Kind = 1 },
|
||||
},
|
||||
Tags = new[]
|
||||
{
|
||||
new { TagId = "t-main", DriverInstanceId = "main-galaxy", EquipmentId = (string?)null, Name = "M1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
new { TagId = "t-sa", DriverInstanceId = "sa-galaxy", EquipmentId = (string?)null, Name = "S1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
},
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_keeps_only_my_clusters_drivers_and_tags()
|
||||
{
|
||||
var blob = BlobOf(MultiClusterSnapshotWithTags());
|
||||
|
||||
var main = DeploymentArtifact.ParseComposition(blob, "central-1:4053");
|
||||
main.DriverInstancePlans.Select(d => d.DriverInstanceId).ShouldBe(new[] { "main-galaxy" });
|
||||
main.GalaxyTags.Select(t => t.TagId).ShouldBe(new[] { "t-main" });
|
||||
|
||||
var siteA = DeploymentArtifact.ParseComposition(blob, "site-a-1:4053");
|
||||
siteA.DriverInstancePlans.Select(d => d.DriverInstanceId).ShouldBe(new[] { "sa-galaxy" });
|
||||
siteA.GalaxyTags.Select(t => t.TagId).ShouldBe(new[] { "t-sa" });
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_unknown_node_is_empty()
|
||||
{
|
||||
var comp = DeploymentArtifact.ParseComposition(BlobOf(MultiClusterSnapshotWithTags()), "ghost-9:4053");
|
||||
comp.GalaxyTags.ShouldBeEmpty();
|
||||
comp.DriverInstancePlans.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_single_cluster_node_id_overload_matches_legacy()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" } },
|
||||
Nodes = new[] { new { NodeId = "n1:4053", ClusterId = "MAIN" } },
|
||||
DriverInstances = new[] { new { DriverInstanceId = "d1", DriverType = "Modbus", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "ns" } },
|
||||
});
|
||||
DeploymentArtifact.ParseComposition(blob, "anything:4053").DriverInstancePlans.Count
|
||||
.ShouldBe(DeploymentArtifact.ParseComposition(blob).DriverInstancePlans.Count);
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run — verify FAIL** (2-arg `ParseComposition` missing):
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
|
||||
**Step 3: Implement**
|
||||
|
||||
Add to `DeploymentArtifact.cs` (after the no-arg `ParseComposition`):
|
||||
|
||||
```csharp
|
||||
/// <summary>Cluster-scoped overload: the address-space composition a node should materialise given
|
||||
/// its NodeId. Filters every projection to the node's own ClusterId (see <see cref="ResolveClusterScope"/>).</summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="nodeId">This node's identity in "host:port" form.</param>
|
||||
/// <returns>The filtered composition per the node's scoping decision.</returns>
|
||||
public static Phase7CompositionResult ParseComposition(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
var scope = ResolveClusterScope(blob, nodeId);
|
||||
if (scope.Mode == ClusterFilterMode.None) return ParseComposition(blob);
|
||||
if (scope.Mode == ClusterFilterMode.Suppress) return Empty();
|
||||
|
||||
var full = ParseComposition(blob);
|
||||
var sets = BuildClusterSets(blob, scope.ClusterId!);
|
||||
return new Phase7CompositionResult(
|
||||
full.UnsAreas.Where(a => sets.AreaIds.Contains(a.UnsAreaId)).ToArray(),
|
||||
full.UnsLines.Where(l => sets.AreaIds.Contains(l.UnsAreaId)).ToArray(),
|
||||
full.EquipmentNodes.Where(e => sets.EquipmentIds.Contains(e.EquipmentId)).ToArray(),
|
||||
full.DriverInstancePlans.Where(d => sets.DriverIds.Contains(d.DriverInstanceId)).ToArray(),
|
||||
full.ScriptedAlarmPlans.Where(a => sets.EquipmentIds.Contains(a.EquipmentId)).ToArray(),
|
||||
full.GalaxyTags.Where(t => sets.DriverIds.Contains(t.DriverInstanceId)).ToArray())
|
||||
{
|
||||
EquipmentTags = full.EquipmentTags.Where(t => sets.DriverIds.Contains(t.DriverInstanceId)).ToArray(),
|
||||
};
|
||||
}
|
||||
|
||||
private sealed record ClusterSets(HashSet<string> DriverIds, HashSet<string> AreaIds, HashSet<string> EquipmentIds);
|
||||
|
||||
/// <summary>Build the in-cluster id sets used to filter a composition: DriverInstanceIds + UnsAreaIds
|
||||
/// that directly carry the ClusterId, plus EquipmentIds whose DriverInstanceId is in-cluster.</summary>
|
||||
private static ClusterSets BuildClusterSets(ReadOnlySpan<byte> blob, string clusterId)
|
||||
{
|
||||
var driverIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
var areaIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
var equipmentIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(blob.ToArray());
|
||||
var root = doc.RootElement;
|
||||
CollectIdsWhereCluster(root, "DriverInstances", "DriverInstanceId", clusterId, driverIds);
|
||||
CollectIdsWhereCluster(root, "UnsAreas", "UnsAreaId", clusterId, areaIds);
|
||||
// Equipment carries no ClusterId — include it when its DriverInstanceId is in-cluster.
|
||||
if (root.TryGetProperty("Equipment", out var eq) && eq.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var el in eq.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var di = el.TryGetProperty("DriverInstanceId", out var diEl) ? diEl.GetString() : null;
|
||||
var id = el.TryGetProperty("EquipmentId", out var idEl) ? idEl.GetString() : null;
|
||||
if (!string.IsNullOrWhiteSpace(id) && di is not null && driverIds.Contains(di))
|
||||
equipmentIds.Add(id!);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (JsonException) { /* empty sets ⇒ nothing matches ⇒ empty composition */ }
|
||||
return new ClusterSets(driverIds, areaIds, equipmentIds);
|
||||
}
|
||||
|
||||
private static void CollectIdsWhereCluster(
|
||||
JsonElement root, string arrayName, string idField, string clusterId, HashSet<string> into)
|
||||
{
|
||||
if (!root.TryGetProperty(arrayName, out var arr) || arr.ValueKind != JsonValueKind.Array) return;
|
||||
foreach (var el in arr.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var cid = el.TryGetProperty("ClusterId", out var cEl) ? cEl.GetString() : null;
|
||||
if (!string.Equals(cid, clusterId, StringComparison.Ordinal)) continue;
|
||||
var id = el.TryGetProperty(idField, out var idEl) ? idEl.GetString() : null;
|
||||
if (!string.IsNullOrWhiteSpace(id)) into.Add(id!);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Note: equipment is filtered via its `DriverInstanceId` (schema-guaranteed present
|
||||
for equipment-namespace rows). If a future schema allows equipment with a null
|
||||
`DriverInstanceId`, extend `BuildClusterSets` to also include equipment whose
|
||||
`UnsLineId` maps to an in-cluster `UnsArea` — out of scope here (the dev rig's
|
||||
sites are empty).
|
||||
|
||||
**Step 4: Run — verify PASS** (new + pre-existing tests):
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DeploymentArtifactTests"`
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DeploymentArtifactTests.cs
|
||||
git commit -m "feat(runtime): node-scoped ParseComposition filters address space by ClusterId"
|
||||
```
|
||||
|
||||
**Acceptance:** scoped `ParseComposition` excludes cross-cluster projections;
|
||||
single-cluster + unknown-node behavior matches the rule; no-arg overload untouched.
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Wire driver-spawn + SubscribeBulk filtering into `DriverHostActor`
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 4
|
||||
|
||||
**Blocked by:** Task 1, Task 2.
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs:367` (ReconcileDrivers) and `:432` (PushDesiredSubscriptions)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/` (add `DriverHostActorClusterScopeTests.cs`, or extend an existing DriverHostActor test if present)
|
||||
|
||||
**Context:** `ReconcileDrivers` (line 349) loads the artifact blob then calls
|
||||
`ParseDriverInstances(blob)`. `PushDesiredSubscriptions` (line 412) calls
|
||||
`ParseComposition(blob)`. Both run for normal applies (`ApplyAndAck`, line 311/321)
|
||||
**and** restart restore (`RestoreApplied`, line 393/395) — so changing these two
|
||||
call sites covers both paths. The ack (`SendAck`, line 314) fires unconditionally
|
||||
*before* the rebuild, so an empty/suppressed slice still acks — no ack change needed.
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
A TestKit test that a driver node in a multi-cluster artifact spawns only its
|
||||
cluster's drivers, and a node whose cluster has no drivers still reaches Applied
|
||||
(acks). Model it on the existing DriverHostActor tests in the same folder (reuse
|
||||
their in-memory DbContext + DispatchDeployment plumbing — inspect a sibling test
|
||||
for the exact harness helpers). Core assertions:
|
||||
|
||||
```csharp
|
||||
// Given a sealed deployment whose artifact has 2 clusters (MAIN: 1 driver, SITE-A: 1 driver)
|
||||
// and a DriverHostActor whose _localNode = "site-a-1:4053":
|
||||
// - after DispatchDeployment, GetDiagnostics shows ONLY the SITE-A driver (not MAIN's)
|
||||
// - the node sends an Applied ApplyAck (convergence holds even though it ignored MAIN's driver)
|
||||
// And a second actor with _localNode = "central-1:4053" shows ONLY the MAIN driver.
|
||||
```
|
||||
|
||||
**Step 2: Run — verify FAIL** (node currently spawns both clusters' drivers):
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests --filter "FullyQualifiedName~DriverHostActorClusterScope"`
|
||||
|
||||
**Step 3: Implement** — two one-line call-site changes:
|
||||
|
||||
`DriverHostActor.cs:367`:
|
||||
```csharp
|
||||
// before:
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(blob);
|
||||
// after:
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(blob, _localNode.Value);
|
||||
```
|
||||
|
||||
`DriverHostActor.cs:432`:
|
||||
```csharp
|
||||
// before:
|
||||
composition = DeploymentArtifact.ParseComposition(blob);
|
||||
// after:
|
||||
composition = DeploymentArtifact.ParseComposition(blob, _localNode.Value);
|
||||
```
|
||||
|
||||
**Step 4: Run — verify PASS, then the whole Runtime suite for no regression:**
|
||||
```bash
|
||||
dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests
|
||||
```
|
||||
Expected: new test green; all pre-existing tests green (single-cluster harnesses
|
||||
hit the `None` branch → unchanged).
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorClusterScopeTests.cs
|
||||
git commit -m "feat(runtime): DriverHost spawns + subscribes only its own ClusterId's drivers"
|
||||
```
|
||||
|
||||
**Acceptance:** a site node spawns only its cluster's drivers and still acks
|
||||
Applied with an empty slice; existing single-cluster tests stay green.
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Wire scoped composition into `OpcUaPublishActor.HandleRebuild`
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 3
|
||||
|
||||
**Blocked by:** Task 2.
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs:212` (HandleRebuild)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorRebuildTests.cs`
|
||||
|
||||
**Context:** `HandleRebuild` (line ~210) loads the artifact then calls
|
||||
`ParseComposition(artifact)` and materialises via `Phase7Applier`. `_localNode` is
|
||||
`NodeId?` (line 46) — null on legacy/dev callers, so guard for null. The existing
|
||||
`OpcUaPublishActorRebuildTests` use a fake/inspectable sink — reuse that pattern.
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```csharp
|
||||
// Build a 2-cluster artifact (MAIN galaxy tag t-main; SITE-A galaxy tag t-sa),
|
||||
// seal it as a Deployment row in the test DbContext, construct an OpcUaPublishActor
|
||||
// with _localNode = NodeId.Parse("site-a-1:4053") and an inspectable sink, send
|
||||
// RebuildAddressSpace(correlation, depId), then assert the sink received ONLY the
|
||||
// SITE-A variable/folders (t-sa) and NOT the MAIN ones (t-main).
|
||||
// Mirror with _localNode = "central-1:4053" → only MAIN.
|
||||
```
|
||||
|
||||
**Step 2: Run — verify FAIL** (sink currently gets both clusters' nodes).
|
||||
|
||||
**Step 3: Implement** — `OpcUaPublishActor.cs:212`:
|
||||
```csharp
|
||||
// before:
|
||||
var composition = DeploymentArtifact.ParseComposition(artifact);
|
||||
// after: scope to this node's ClusterId when we know our identity; legacy/dev callers (null
|
||||
// _localNode) keep the unscoped behaviour.
|
||||
var composition = _localNode is { } ln
|
||||
? DeploymentArtifact.ParseComposition(artifact, ln.Value)
|
||||
: DeploymentArtifact.ParseComposition(artifact);
|
||||
```
|
||||
|
||||
**Step 4: Run — verify PASS + full Runtime suite:**
|
||||
```bash
|
||||
dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests
|
||||
```
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorRebuildTests.cs
|
||||
git commit -m "feat(runtime): OPC UA rebuild materialises only the node's ClusterId slice"
|
||||
```
|
||||
|
||||
**Acceptance:** a node materialises only its own cluster's address space; null
|
||||
`_localNode` keeps legacy behavior; existing rebuild tests stay green.
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Multi-cluster scoping E2E on the cluster harness
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none
|
||||
|
||||
**Blocked by:** Task 3, Task 4.
|
||||
|
||||
**Files:**
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/` (add `MultiClusterScopingTests.cs`)
|
||||
- Possibly Modify: `tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/TwoNodeClusterHarness.cs` (only if a 2-ClusterId seed helper is needed)
|
||||
|
||||
**Context:** `TwoNodeClusterHarness` boots an in-process 2-node cluster on an
|
||||
in-memory DB with a null OPC UA sink. It proves the *deploy path* end-to-end
|
||||
(compose → broadcast → apply → ack) but cannot assert a materialised tree (null
|
||||
sink). So this test asserts **driver** scoping through the real path: seed two
|
||||
`ServerCluster` rows + two `ClusterNode` rows (one per node, different `ClusterId`)
|
||||
+ one `DriverInstance` per cluster, run one deployment, and assert via each node's
|
||||
`GetDiagnostics` that each node hosts only its own cluster's driver.
|
||||
|
||||
If the harness's seed helpers can't express two clusters cleanly, that's a plan
|
||||
defect — surface it; the unit + actor tests (Tasks 1–4) already cover the scoping
|
||||
logic, and Task 9 covers the live proof.
|
||||
|
||||
**Step 1: Write the failing test** (per the context above).
|
||||
**Step 2: Run — verify FAIL.**
|
||||
**Step 3:** No production code — this test passes once Tasks 3+4 are in. If it
|
||||
fails, the defect is in 3/4; fix there, not here.
|
||||
**Step 4: Run — verify PASS:**
|
||||
`dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests --filter "FullyQualifiedName~MultiClusterScoping"`
|
||||
**Step 5: Commit**
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests/MultiClusterScopingTests.cs
|
||||
# add TwoNodeClusterHarness.cs ONLY if you modified it
|
||||
git commit -m "test(integration): multi-cluster deploy scopes drivers per node"
|
||||
```
|
||||
|
||||
**Acceptance:** one deploy over a 2-cluster mesh leaves each node hosting only its
|
||||
own cluster's driver.
|
||||
|
||||
---
|
||||
|
||||
## Task 6: Rewrite `docker-dev/docker-compose.yml` for the single mesh
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 1, Task 2, Task 7, Task 8
|
||||
|
||||
**Files:**
|
||||
- Modify: `docker-dev/docker-compose.yml`
|
||||
|
||||
**Context:** Today MAIN is 4 nodes (`admin-a`/`admin-b` admin + `driver-a`/`driver-b`
|
||||
driver) as one mesh; SITE-A/SITE-B are 2-node fused meshes with their own seeds.
|
||||
The anchor `&otopcua-host` (currently on `admin-a`) holds the shared build + env.
|
||||
|
||||
**Steps:**
|
||||
|
||||
1. Replace the four MAIN services with **two fused nodes**:
|
||||
- `central-1` (becomes the `&otopcua-host` anchor): `OTOPCUA_ROLES: "admin,driver"`,
|
||||
`ASPNETCORE_URLS: "http://+:9000"`, `Cluster__PublicHostname: "central-1"`,
|
||||
`Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"`, `Cluster__Roles__0: "admin"`,
|
||||
`Cluster__Roles__1: "driver"`, keep all `Security__*` (Jwt/Ldap/DeployApiKey) + `GALAXY_MXGW_API_KEY`
|
||||
(keep the existing `${GALAXY_MXGW_API_KEY:-mxgw_otopcua2_...}` default — do **not** introduce a new
|
||||
hardcoded key), `ports: ["4840:4840"]`.
|
||||
- `central-2`: same env, `Cluster__PublicHostname: "central-2"`, seed → `central-1`,
|
||||
`ports: ["4841:4840"]`, `depends_on: { sql: healthy, central-1: started }`.
|
||||
2. Convert the four site services to **driver-only**, all seeding `central-1`:
|
||||
- For each of `site-a-1`, `site-a-2`, `site-b-1`, `site-b-2`: `OTOPCUA_ROLES: "driver"`,
|
||||
`Cluster__Roles__0: "driver"` (remove the `admin` role + `Cluster__Roles__1`), keep
|
||||
`Cluster__PublicHostname` = own name, set `Cluster__SeedNodes__0: "akka.tcp://otopcua@central-1:4053"`,
|
||||
**remove** `ASPNETCORE_URLS` + the `Security__Jwt__*` / `Security__Ldap__*` / `Security__DeployApiKey`
|
||||
block (driver-only nodes serve no UI and authenticate no users), keep the `ConnectionStrings__ConfigDb`
|
||||
+ `GALAXY_MXGW_API_KEY` lines, keep their OPC UA ports (`4842`–`4845`), and add
|
||||
`depends_on: { sql: healthy, central-1: started }`.
|
||||
3. Update `traefik.depends_on` to `[central-1, central-2]` (drop the removed services).
|
||||
4. Rewrite the header comment block (lines ~1–40) to describe the **single mesh,
|
||||
hub-and-spoke** topology: one Akka mesh seeded by `central-1`; `central-1/2`
|
||||
are `admin,driver` (the only UI + deploy singleton); `site-*` are `driver`-only
|
||||
members scoped by `ClusterId`; central UI at `:9200` manages + deploys to all.
|
||||
Keep the existing accurate notes (SQL persistence, mesh isolation note now
|
||||
describes a single mesh, headless deploy via `:9200/api/deployments`).
|
||||
|
||||
**Verify:**
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml config --quiet && echo "compose OK"
|
||||
```
|
||||
Expected: `compose OK`.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add docker-dev/docker-compose.yml
|
||||
git commit -m "feat(docker-dev): single-mesh hub-and-spoke (central-1/2 + driver-only sites)"
|
||||
```
|
||||
|
||||
**Acceptance:** `docker compose config` parses; central nodes fused with UI; site
|
||||
nodes driver-only seeding central; no new hardcoded API key.
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Rewrite `docker-dev/traefik-dynamic.yml` (central-only route)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 1, Task 2, Task 6, Task 8
|
||||
|
||||
**Files:**
|
||||
- Modify: `docker-dev/traefik-dynamic.yml`
|
||||
|
||||
**Steps:**
|
||||
1. Keep the `otopcua-admin` router (`PathPrefix(`/`)`) + service, but point its
|
||||
`loadBalancer.servers` at `http://central-1:9000` and `http://central-2:9000`.
|
||||
2. Delete the `otopcua-site-a` and `otopcua-site-b` routers **and** their services
|
||||
(driver-only sites serve no UI). Keep the sticky-cookie + `/health/active`
|
||||
healthcheck on the surviving `otopcua-admin` service.
|
||||
3. Update the file header comment to describe the single central UI route.
|
||||
|
||||
**Verify:** covered by Task 6's `docker compose config` (Traefik file is mounted,
|
||||
not parsed by compose) — sanity-check it's valid YAML by eye; the live bring-up in
|
||||
Task 9 is the real check.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add docker-dev/traefik-dynamic.yml
|
||||
git commit -m "feat(docker-dev): Traefik routes only the central cluster UI"
|
||||
```
|
||||
|
||||
**Acceptance:** one router → central-1/central-2; site routers/services removed.
|
||||
|
||||
---
|
||||
|
||||
## Task 8: Rewrite `docker-dev/seed/seed-clusters.sql` (MAIN nodes → central-1/2)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 1, Task 2, Task 6, Task 7
|
||||
|
||||
**Files:**
|
||||
- Modify: `docker-dev/seed/seed-clusters.sql`
|
||||
|
||||
**Context:** The seed inserts 3 `ServerCluster` rows + 6 `ClusterNode` rows. MAIN's
|
||||
`ClusterNode` rows are currently `driver-a:4053` / `driver-b:4053`. Sites already
|
||||
have **no** drivers/tags (only `ServerCluster` + `ClusterNode`), which matches the
|
||||
"empty sites" decision — leave them empty.
|
||||
|
||||
**Steps:**
|
||||
1. Change MAIN's two `ClusterNode` inserts from `driver-a` / `driver-b` to
|
||||
`central-1` / `central-2`: `NodeId` `central-1:4053` / `central-2:4053`,
|
||||
`Host` `central-1` / `central-2`, `ApplicationUri` `urn:OtOpcUa:central-1` /
|
||||
`urn:OtOpcUa:central-2`, keep `OpcUaPort 4840`, `ServiceLevelBase` 200/150.
|
||||
Update the `IF NOT EXISTS ... WHERE NodeId = '...'` guards to the new ids.
|
||||
2. Update the MAIN `ServerCluster` `Notes` to "central-1/central-2 fused
|
||||
admin+driver — UI + deploy singleton + MAIN OPC UA publishers."
|
||||
3. Update the SITE-A/SITE-B `ServerCluster` `Notes` to "2-node driver-only,
|
||||
managed by the central cluster over the shared mesh (empty until configured)."
|
||||
4. Update the file header comment block (the `ClusterNode` map at lines ~5–7) to
|
||||
`central-1, central-2 → MAIN`.
|
||||
5. Leave the Galaxy namespace/driver/tags (MAIN) and the LDAP→role mappings
|
||||
unchanged.
|
||||
|
||||
**Verify:** SQL isn't run locally on macOS (no SQL reachable); correctness is
|
||||
confirmed by the live bring-up in Task 9 (the `cluster-seed` job runs it). Eyeball
|
||||
that every changed `NodeId`/guard is consistent.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add docker-dev/seed/seed-clusters.sql
|
||||
git commit -m "feat(docker-dev): seed MAIN ClusterNodes as central-1/central-2"
|
||||
```
|
||||
|
||||
**Acceptance:** MAIN `ClusterNode` rows are `central-1`/`central-2`; sites keep
|
||||
their cluster + node rows with no drivers; notes/header updated.
|
||||
|
||||
---
|
||||
|
||||
## Task 9: Live docker-dev verification
|
||||
|
||||
**Classification:** standard (verification — no subagent review needed)
|
||||
**Estimated implement time:** ~5 min (plus container build time)
|
||||
**Parallelizable with:** none
|
||||
|
||||
**Blocked by:** Task 3, Task 4, Task 5, Task 6, Task 7, Task 8.
|
||||
|
||||
**Files:** none (operational).
|
||||
|
||||
**Steps (run from repo root on this Mac; Docker is local):**
|
||||
1. Sync deployment + rebuild the image and bring the rig up:
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml down
|
||||
docker compose -f docker-dev/docker-compose.yml up -d --build
|
||||
```
|
||||
2. Confirm the mesh formed and the seed ran:
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml ps
|
||||
docker compose -f docker-dev/docker-compose.yml logs cluster-seed --tail=40
|
||||
```
|
||||
Expect 3 `ServerCluster` rows + 6 `ClusterNode` rows (`central-1/2`, `site-a-1/2`, `site-b-1/2`).
|
||||
3. Trigger a global deploy headlessly (no UI login needed — the deploy API is on
|
||||
the central admin nodes):
|
||||
```bash
|
||||
curl -s -X POST http://localhost:9200/api/deployments \
|
||||
-H "X-Api-Key: docker-dev-deploy-key" -H "Content-Type: application/json" \
|
||||
-d '{"createdBy":"per-cluster-verify"}'
|
||||
```
|
||||
Expect `202` + a `deploymentId`.
|
||||
4. Confirm scoping in the driver logs — central applies the Galaxy driver, sites apply empty:
|
||||
```bash
|
||||
docker compose -f docker-dev/docker-compose.yml logs central-1 | grep -iE "applied deployment|galaxy|materialis"
|
||||
docker compose -f docker-dev/docker-compose.yml logs site-a-1 | grep -iE "applied deployment|galaxy|materialis"
|
||||
```
|
||||
Expect `central-1` to materialise the MAIN Galaxy tags; `site-a-1` to apply with **no** Galaxy/MAIN nodes.
|
||||
5. Browse-check with the Client CLI:
|
||||
```bash
|
||||
dotnet run --project src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI -- browse -u opc.tcp://localhost:4840 -r -d 4 # central: Galaxy tree
|
||||
dotnet run --project src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI -- browse -u opc.tcp://localhost:4842 -r -d 4 # site-a: empty (no MAIN tree)
|
||||
```
|
||||
Expect the MAIN Galaxy hierarchy on `:4840` and an **empty** address space on `:4842` (NOT the merged tree).
|
||||
|
||||
**Acceptance:** the rig boots as one mesh; a single deploy populates the central
|
||||
OPC UA tree and leaves the site nodes empty (proving per-ClusterId scoping live).
|
||||
If anything regresses, stop and debug (do not paper over).
|
||||
|
||||
---
|
||||
|
||||
## Task 10: Update docker-dev docs + memory
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 9
|
||||
|
||||
**Blocked by:** Task 6 (final topology).
|
||||
|
||||
**Files:**
|
||||
- Modify: `CLAUDE.md` (the Docker Workflow / docker-dev references that mention the cluster layout)
|
||||
- Modify: `docs/v2/dev-environment.md` (if it describes the three-isolated-mesh topology)
|
||||
- Modify: `/Users/dohertj2/.claude/projects/-Users-dohertj2-Desktop-OtOpcUa/memory/project_dev_environment.md` + `MEMORY.md` pointer
|
||||
|
||||
**Steps:**
|
||||
1. In `CLAUDE.md` and `docs/v2/dev-environment.md`, update any description of the
|
||||
docker-dev topology from "three isolated meshes / MAIN admin-a+admin-b /
|
||||
site fused" to "single mesh, hub-and-spoke: `central-1`/`central-2` fused
|
||||
admin+driver own the only UI + deploy singleton; `site-*` are driver-only
|
||||
members scoped by `ClusterId`; central UI at `:9200` deploys to all." Update
|
||||
the OPC UA endpoint list (`central-1` `:4840`, `central-2` `:4841`, sites
|
||||
`:4842`–`:4845`).
|
||||
2. Update the `project_dev_environment.md` memory's docker-dev section to match
|
||||
(node names, hub-and-spoke, "central UI deploys to all clusters"). Keep the
|
||||
one-line `MEMORY.md` pointer accurate.
|
||||
|
||||
**Commit:**
|
||||
```bash
|
||||
git add CLAUDE.md docs/v2/dev-environment.md
|
||||
git commit -m "docs(docker-dev): document single-mesh hub-and-spoke topology"
|
||||
```
|
||||
(The memory files live outside the repo — write them with the memory workflow, not git.)
|
||||
|
||||
**Acceptance:** docs + memory describe the new topology accurately.
|
||||
|
||||
---
|
||||
|
||||
## Done criteria
|
||||
|
||||
- `dotnet build ZB.MOM.WW.OtOpcUa.slnx` clean.
|
||||
- `dotnet test tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests` green (new scoping
|
||||
tests + no regressions) and `...Host.IntegrationTests` multi-cluster test green.
|
||||
- The full pre-existing suite stays green (the `ClusterCount ≤ 1` lenient branch
|
||||
guarantees single-cluster behavior is unchanged).
|
||||
- `docker compose config` parses; the live rig (Task 9) shows central serving the
|
||||
Galaxy tree and sites empty under one global deploy.
|
||||
|
||||
## Out of scope (follow-ups)
|
||||
|
||||
- **Per-cluster deploy** (deploy just SITE-A from the UI) — coordinator + UI work.
|
||||
- **Seeding demo drivers on the sites** — added via the central UI.
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-07-per-cluster-scoping.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 1: ResolveClusterScope + node-scoped ParseDriverInstances", "status": "pending"},
|
||||
{"id": 2, "subject": "Task 2: Node-scoped ParseComposition (address-space filter)", "status": "pending", "blockedBy": [1]},
|
||||
{"id": 3, "subject": "Task 3: Wire driver-spawn + SubscribeBulk filtering into DriverHostActor", "status": "pending", "blockedBy": [1, 2]},
|
||||
{"id": 4, "subject": "Task 4: Wire scoped composition into OpcUaPublishActor.HandleRebuild", "status": "pending", "blockedBy": [2]},
|
||||
{"id": 5, "subject": "Task 5: Multi-cluster scoping E2E on the cluster harness", "status": "pending", "blockedBy": [3, 4]},
|
||||
{"id": 6, "subject": "Task 6: Rewrite docker-dev/docker-compose.yml for the single mesh", "status": "pending"},
|
||||
{"id": 7, "subject": "Task 7: Rewrite docker-dev/traefik-dynamic.yml (central-only route)", "status": "pending"},
|
||||
{"id": 8, "subject": "Task 8: Rewrite docker-dev/seed/seed-clusters.sql (MAIN nodes -> central-1/2)", "status": "pending"},
|
||||
{"id": 9, "subject": "Task 9: Live docker-dev verification", "status": "pending", "blockedBy": [3, 4, 5, 6, 7, 8]},
|
||||
{"id": 10, "subject": "Task 10: Update docker-dev docs + memory", "status": "pending", "blockedBy": [6]}
|
||||
],
|
||||
"lastUpdated": "2026-06-07"
|
||||
}
|
||||
@@ -30,10 +30,15 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
private readonly TimeSpan _applyDeadline;
|
||||
private readonly ILoggingAdapter _log = Context.GetLogger();
|
||||
private readonly Dictionary<NodeId, ApplyAckOutcome> _acks = new();
|
||||
// NodeId equality here is case-insensitive (by Value) to match the case-insensitive ClusterId/
|
||||
// NodeId scoping in DeploymentArtifact.ResolveClusterScope — so an ack from a node whose address
|
||||
// differs only in case still matches its expected-ack entry (SQL collation + DNS are
|
||||
// case-insensitive, so the same node can surface with different casing).
|
||||
private static readonly IEqualityComparer<NodeId> NodeIdComparer = new CaseInsensitiveNodeIdComparer();
|
||||
private readonly Dictionary<NodeId, ApplyAckOutcome> _acks = new(NodeIdComparer);
|
||||
|
||||
private DeploymentId? _current;
|
||||
private HashSet<NodeId> _expectedAcks = new();
|
||||
private HashSet<NodeId> _expectedAcks = new(NodeIdComparer);
|
||||
|
||||
/// <summary>Gets the timer scheduler for managing apply deadlines.</summary>
|
||||
public ITimerScheduler Timers { get; set; } = null!;
|
||||
@@ -88,7 +93,7 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
.AsNoTracking()
|
||||
.ToList();
|
||||
|
||||
_expectedAcks = nodeStates.Select(s => NodeId.Parse(s.NodeId)).ToHashSet();
|
||||
_expectedAcks = nodeStates.Select(s => NodeId.Parse(s.NodeId)).ToHashSet(NodeIdComparer);
|
||||
foreach (var s in nodeStates.Where(s => s.Status != NodeDeploymentStatus.Applying))
|
||||
_acks[NodeId.Parse(s.NodeId)] = s.Status == NodeDeploymentStatus.Applied
|
||||
? ApplyAckOutcome.Applied
|
||||
@@ -248,7 +253,7 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
private HashSet<NodeId> DiscoverDriverNodes()
|
||||
{
|
||||
var cluster = Akka.Cluster.Cluster.Get(Context.System);
|
||||
var nodes = new HashSet<NodeId>();
|
||||
var nodes = new HashSet<NodeId>(NodeIdComparer);
|
||||
foreach (var member in cluster.State.Members)
|
||||
{
|
||||
if (member.Status is not (MemberStatus.Up or MemberStatus.Joining)) continue;
|
||||
@@ -261,4 +266,18 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
/// <summary>Case-insensitive <see cref="NodeId"/> equality (by <see cref="NodeId.Value"/>),
|
||||
/// matching the case-insensitive scoping in <c>DeploymentArtifact.ResolveClusterScope</c> so the
|
||||
/// expected-ack set and incoming acks agree regardless of host-name casing.</summary>
|
||||
private sealed class CaseInsensitiveNodeIdComparer : IEqualityComparer<NodeId>
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public bool Equals(NodeId x, NodeId y) =>
|
||||
string.Equals(x.Value, y.Value, StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
/// <inheritdoc />
|
||||
public int GetHashCode(NodeId obj) =>
|
||||
StringComparer.OrdinalIgnoreCase.GetHashCode(obj.Value ?? string.Empty);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,27 @@ public sealed record DriverInstanceSpec(
|
||||
string DriverConfig,
|
||||
string? ClusterId = null);
|
||||
|
||||
/// <summary>How a node should scope a deployment artifact to its own ClusterId.</summary>
|
||||
public enum ClusterFilterMode
|
||||
{
|
||||
/// <summary>Apply everything (single-cluster / legacy deployments).</summary>
|
||||
None,
|
||||
|
||||
/// <summary>Filter the artifact to the node's own ClusterId.</summary>
|
||||
ScopeTo,
|
||||
|
||||
/// <summary>Apply nothing (node's cluster row not found in a multi-cluster artifact).</summary>
|
||||
Suppress,
|
||||
}
|
||||
|
||||
/// <summary>Resolved scoping decision for a node against an artifact.</summary>
|
||||
/// <param name="Mode">
|
||||
/// None = apply everything (single-cluster / legacy); ScopeTo = filter to <paramref name="ClusterId"/>;
|
||||
/// Suppress = apply nothing.
|
||||
/// </param>
|
||||
/// <param name="ClusterId">The node's ClusterId when <paramref name="Mode"/> is ScopeTo; otherwise null.</param>
|
||||
public readonly record struct ClusterScope(ClusterFilterMode Mode, string? ClusterId);
|
||||
|
||||
public static class DeploymentArtifact
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
@@ -58,6 +79,70 @@ public static class DeploymentArtifact
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolve how a node should scope a deployment artifact to its own ClusterId. Single-cluster
|
||||
/// (or legacy) artifacts resolve to <see cref="ClusterFilterMode.None"/> so every existing
|
||||
/// deployment applies unchanged. In a multi-cluster artifact the node's <c>ClusterNode</c> row
|
||||
/// (matched by <paramref name="nodeId"/>) selects <see cref="ClusterFilterMode.ScopeTo"/> its
|
||||
/// ClusterId; a missing row resolves to <see cref="ClusterFilterMode.Suppress"/>. Empty /
|
||||
/// malformed blobs resolve to <see cref="ClusterFilterMode.None"/> (lenient, matching the
|
||||
/// other parsers).
|
||||
/// </summary>
|
||||
/// <param name="blob">The deployment artifact blob to inspect.</param>
|
||||
/// <param name="nodeId">The node's identity (e.g. "central-1:4053") to match against <c>Nodes</c>.</param>
|
||||
/// <returns>The resolved <see cref="ClusterScope"/> decision for this node.</returns>
|
||||
public static ClusterScope ResolveClusterScope(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
if (blob.IsEmpty) return new ClusterScope(ClusterFilterMode.None, null);
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(blob.ToArray());
|
||||
var root = doc.RootElement;
|
||||
var clusterCount = root.TryGetProperty("Clusters", out var cl) && cl.ValueKind == JsonValueKind.Array
|
||||
? cl.GetArrayLength() : 0;
|
||||
if (clusterCount <= 1) return new ClusterScope(ClusterFilterMode.None, null);
|
||||
|
||||
string? myCluster = null;
|
||||
if (root.TryGetProperty("Nodes", out var nodes) && nodes.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var el in nodes.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var nid = el.TryGetProperty("NodeId", out var nEl) ? nEl.GetString() : null;
|
||||
if (!string.Equals(nid, nodeId, StringComparison.OrdinalIgnoreCase)) continue;
|
||||
myCluster = el.TryGetProperty("ClusterId", out var cEl) ? cEl.GetString() : null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return string.IsNullOrWhiteSpace(myCluster)
|
||||
? new ClusterScope(ClusterFilterMode.Suppress, null)
|
||||
: new ClusterScope(ClusterFilterMode.ScopeTo, myCluster);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return new ClusterScope(ClusterFilterMode.None, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse a deployment artifact blob into the driver-instance specs a specific node should
|
||||
/// spawn, scoped to its own ClusterId via <see cref="ResolveClusterScope"/>. Single-cluster /
|
||||
/// legacy artifacts return every spec; a multi-cluster artifact returns only the matching
|
||||
/// cluster's specs (or none when the node's row is absent).
|
||||
/// </summary>
|
||||
/// <param name="blob">The deployment artifact blob to parse.</param>
|
||||
/// <param name="nodeId">The node's identity (e.g. "central-1:4053") used to resolve cluster scope.</param>
|
||||
/// <returns>The driver-instance specs this node should spawn.</returns>
|
||||
public static IReadOnlyList<DriverInstanceSpec> ParseDriverInstances(ReadOnlySpan<byte> blob, string nodeId)
|
||||
{
|
||||
var scope = ResolveClusterScope(blob, nodeId);
|
||||
if (scope.Mode == ClusterFilterMode.Suppress) return Array.Empty<DriverInstanceSpec>();
|
||||
var all = ParseDriverInstances(blob);
|
||||
return scope.Mode == ClusterFilterMode.ScopeTo
|
||||
? all.Where(s => string.Equals(s.ClusterId, scope.ClusterId, StringComparison.OrdinalIgnoreCase)).ToArray()
|
||||
: all;
|
||||
}
|
||||
|
||||
private static DriverInstanceSpec? TryReadSpec(JsonElement el)
|
||||
{
|
||||
var rowId = el.TryGetProperty("DriverInstanceRowId", out var rowEl)
|
||||
@@ -120,6 +205,115 @@ public static class DeploymentArtifact
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Cluster-scoped overload: the address-space composition a node should materialise given
|
||||
/// its NodeId. Filters every projection to the node's own ClusterId (see <see cref="ResolveClusterScope"/>).
|
||||
/// When <paramref name="onInconsistency"/> is supplied it is invoked with a human-readable message for each
|
||||
/// kept equipment whose owning UNS line is NOT in the node's cluster — a cross-cluster binding that
|
||||
/// violates the same-cluster invariant (decision #122) and would orphan the equipment folder. This is
|
||||
/// detection only (observability); the equipment is still returned, since the upstream draft validator
|
||||
/// is the authority that should prevent the binding in the first place.</summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="nodeId">This node's identity in "host:port" form.</param>
|
||||
/// <param name="onInconsistency">Optional diagnostic callback for cross-cluster orphan bindings; null disables the check.</param>
|
||||
/// <returns>The filtered composition per the node's scoping decision.</returns>
|
||||
public static Phase7CompositionResult ParseComposition(
|
||||
ReadOnlySpan<byte> blob, string nodeId, Action<string>? onInconsistency = null)
|
||||
{
|
||||
var scope = ResolveClusterScope(blob, nodeId);
|
||||
if (scope.Mode == ClusterFilterMode.None) return ParseComposition(blob);
|
||||
if (scope.Mode == ClusterFilterMode.Suppress) return Empty();
|
||||
|
||||
var full = ParseComposition(blob);
|
||||
var sets = BuildClusterSets(blob, scope.ClusterId!);
|
||||
|
||||
var keptLines = full.UnsLines.Where(l => sets.AreaIds.Contains(l.UnsAreaId)).ToArray();
|
||||
var keptEquipment = full.EquipmentNodes.Where(e => sets.EquipmentIds.Contains(e.EquipmentId)).ToArray();
|
||||
|
||||
if (onInconsistency is not null)
|
||||
{
|
||||
var keptLineIds = keptLines.Select(l => l.UnsLineId).ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var e in keptEquipment)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(e.UnsLineId) && !keptLineIds.Contains(e.UnsLineId))
|
||||
onInconsistency(
|
||||
$"equipment '{e.EquipmentId}' is in cluster '{scope.ClusterId}' (by its driver) but its " +
|
||||
$"UNS line '{e.UnsLineId}' is not — a cross-cluster binding violates the same-cluster " +
|
||||
"invariant (decision #122) and would orphan the equipment folder.");
|
||||
}
|
||||
}
|
||||
|
||||
return new Phase7CompositionResult(
|
||||
full.UnsAreas.Where(a => sets.AreaIds.Contains(a.UnsAreaId)).ToArray(),
|
||||
keptLines,
|
||||
keptEquipment,
|
||||
full.DriverInstancePlans.Where(d => sets.DriverIds.Contains(d.DriverInstanceId)).ToArray(),
|
||||
full.ScriptedAlarmPlans.Where(a => sets.EquipmentIds.Contains(a.EquipmentId)).ToArray(),
|
||||
full.GalaxyTags.Where(t => sets.DriverIds.Contains(t.DriverInstanceId)).ToArray())
|
||||
{
|
||||
EquipmentTags = full.EquipmentTags.Where(t => sets.DriverIds.Contains(t.DriverInstanceId)).ToArray(),
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>The in-cluster id sets used to filter a composition.</summary>
|
||||
/// <param name="DriverIds">DriverInstanceIds whose row carries the in-scope ClusterId.</param>
|
||||
/// <param name="AreaIds">UnsAreaIds whose row carries the in-scope ClusterId.</param>
|
||||
/// <param name="EquipmentIds">EquipmentIds whose owning DriverInstanceId is in-cluster.</param>
|
||||
private sealed record ClusterSets(HashSet<string> DriverIds, HashSet<string> AreaIds, HashSet<string> EquipmentIds);
|
||||
|
||||
/// <summary>Build the in-cluster id sets used to filter a composition: DriverInstanceIds + UnsAreaIds
|
||||
/// that directly carry the ClusterId, plus EquipmentIds whose DriverInstanceId is in-cluster.</summary>
|
||||
/// <param name="blob">The deployment artifact blob.</param>
|
||||
/// <param name="clusterId">The node's ClusterId to scope to.</param>
|
||||
/// <returns>The resolved in-cluster id sets (empty on parse failure => empty composition).</returns>
|
||||
private static ClusterSets BuildClusterSets(ReadOnlySpan<byte> blob, string clusterId)
|
||||
{
|
||||
var driverIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
var areaIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
var equipmentIds = new HashSet<string>(StringComparer.Ordinal);
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(blob.ToArray());
|
||||
var root = doc.RootElement;
|
||||
CollectIdsWhereCluster(root, "DriverInstances", "DriverInstanceId", clusterId, driverIds);
|
||||
CollectIdsWhereCluster(root, "UnsAreas", "UnsAreaId", clusterId, areaIds);
|
||||
// Equipment carries no ClusterId — include it when its DriverInstanceId is in-cluster.
|
||||
if (root.TryGetProperty("Equipment", out var eq) && eq.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var el in eq.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var di = el.TryGetProperty("DriverInstanceId", out var diEl) ? diEl.GetString() : null;
|
||||
var id = el.TryGetProperty("EquipmentId", out var idEl) ? idEl.GetString() : null;
|
||||
if (!string.IsNullOrWhiteSpace(id) && di is not null && driverIds.Contains(di))
|
||||
equipmentIds.Add(id!);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (JsonException) { /* empty sets => nothing matches => empty composition */ }
|
||||
return new ClusterSets(driverIds, areaIds, equipmentIds);
|
||||
}
|
||||
|
||||
/// <summary>Collect each row's <paramref name="idField"/> value from <paramref name="arrayName"/> whose
|
||||
/// ClusterId equals <paramref name="clusterId"/> (case-insensitive, matching the codebase convention).</summary>
|
||||
/// <param name="root">The artifact root element.</param>
|
||||
/// <param name="arrayName">The array property name to scan (e.g. "DriverInstances").</param>
|
||||
/// <param name="idField">The id field to collect from each in-cluster row.</param>
|
||||
/// <param name="clusterId">The ClusterId rows must match.</param>
|
||||
/// <param name="into">The set to collect matching ids into.</param>
|
||||
private static void CollectIdsWhereCluster(
|
||||
JsonElement root, string arrayName, string idField, string clusterId, HashSet<string> into)
|
||||
{
|
||||
if (!root.TryGetProperty(arrayName, out var arr) || arr.ValueKind != JsonValueKind.Array) return;
|
||||
foreach (var el in arr.EnumerateArray())
|
||||
{
|
||||
if (el.ValueKind != JsonValueKind.Object) continue;
|
||||
var cid = el.TryGetProperty("ClusterId", out var cEl) ? cEl.GetString() : null;
|
||||
if (!string.Equals(cid, clusterId, StringComparison.OrdinalIgnoreCase)) continue;
|
||||
var id = el.TryGetProperty(idField, out var idEl) ? idEl.GetString() : null;
|
||||
if (!string.IsNullOrWhiteSpace(id)) into.Add(id!);
|
||||
}
|
||||
}
|
||||
|
||||
private static Phase7CompositionResult Empty() => new(
|
||||
Array.Empty<UnsAreaProjection>(),
|
||||
Array.Empty<UnsLineProjection>(),
|
||||
|
||||
@@ -364,7 +364,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
|
||||
return;
|
||||
}
|
||||
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(blob);
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(blob, _localNode.Value);
|
||||
var snapshots = _children.ToDictionary(
|
||||
kv => kv.Key,
|
||||
kv => new DriverChildSnapshot(kv.Value.DriverType, kv.Value.LastConfigJson),
|
||||
@@ -429,7 +429,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
|
||||
Phase7CompositionResult composition;
|
||||
try
|
||||
{
|
||||
composition = DeploymentArtifact.ParseComposition(blob);
|
||||
composition = DeploymentArtifact.ParseComposition(blob, _localNode.Value);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
|
||||
@@ -209,7 +209,10 @@ public sealed class OpcUaPublishActor : ReceiveActor
|
||||
var artifact = msg.DeploymentId is { } depId
|
||||
? LoadArtifact(depId)
|
||||
: LoadLatestArtifact();
|
||||
var composition = DeploymentArtifact.ParseComposition(artifact);
|
||||
var composition = _localNode is { } ln
|
||||
? DeploymentArtifact.ParseComposition(artifact, ln.Value,
|
||||
inconsistency => _log.Warning("OpcUaPublish {Node}: cross-cluster binding — {Message}", ln, inconsistency))
|
||||
: DeploymentArtifact.ParseComposition(artifact);
|
||||
var plan = Phase7Planner.Compute(_lastApplied, composition);
|
||||
|
||||
if (plan.IsEmpty)
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
|
||||
|
||||
/// <summary>
|
||||
/// End-to-end multi-cluster scoping over the real 2-node Akka cluster: two driver nodes are bound
|
||||
/// to DIFFERENT logical clusters (MAIN + SITE-A) via their <see cref="ClusterNode"/> rows, then a
|
||||
/// SINGLE deployment is composed + broadcast through the real
|
||||
/// <c>AdminOperationsActor → ConfigPublishCoordinator → DriverHostActor</c> path.
|
||||
///
|
||||
/// <para>This proves the full deploy path applies per-ClusterId scoping that the actor-level test
|
||||
/// (<c>DriverHostActor_spawns_only_its_clusters_drivers</c>) covers in isolation: the node in MAIN
|
||||
/// ends up hosting ONLY the MAIN driver, the node in SITE-A ONLY the SITE-A driver, and the
|
||||
/// deployment still CONVERGES (seals) even though each node applies only a 1-driver slice — the ack
|
||||
/// fires unconditionally regardless of slice size.</para>
|
||||
///
|
||||
/// <para>The harness wires <c>NullDriverFactory</c> (no real transports), so each spawned driver is
|
||||
/// stubbed; we assert presence + identity (not connectivity) via
|
||||
/// <see cref="IFleetDiagnosticsClient"/>, the same cross-node Ask path
|
||||
/// <c>FleetDiagnosticsRoundTripTests</c> uses.</para>
|
||||
/// </summary>
|
||||
public sealed class MultiClusterScopingTests
|
||||
{
|
||||
private const string MainCluster = "MAIN";
|
||||
private const string SiteACluster = "SITE-A";
|
||||
private const string MainDriverId = "main-modbus";
|
||||
private const string SiteADriverId = "sa-modbus";
|
||||
|
||||
private static CancellationToken Ct => TestContext.Current.CancellationToken;
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a single deploy scopes drivers per node: MAIN's node hosts only the MAIN driver,
|
||||
/// SITE-A's node only the SITE-A driver, and both nodes reach Applied so the deployment seals.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Deploy_scopes_drivers_to_each_nodes_own_cluster()
|
||||
{
|
||||
await using var harness = await TwoNodeClusterHarness.StartAsync();
|
||||
|
||||
// Assign each cluster node to a DIFFERENT logical cluster. NodeId MUST equal the node's
|
||||
// ClusterRoleInfo.LocalNode (host:port) so the artifact's Nodes[] map resolves each node's
|
||||
// ClusterId at apply time.
|
||||
await SeedTwoClusterConfigAsync(harness, mainNodeId: harness.NodeANodeId, siteANodeId: harness.NodeBNodeId);
|
||||
|
||||
await using var scope = harness.NodeA.Services.CreateAsyncScope();
|
||||
var adminOps = scope.ServiceProvider.GetRequiredService<IAdminOperationsClient>();
|
||||
|
||||
var result = await adminOps.StartDeploymentAsync(createdBy: "alice@test", Ct);
|
||||
result.Outcome.ShouldBe(StartDeploymentOutcome.Accepted);
|
||||
var deploymentId = result.DeploymentId!.Value.Value;
|
||||
|
||||
// Convergence: both nodes ack Applied and the coordinator seals — even though each node
|
||||
// applied only a 1-driver slice of the 2-cluster artifact.
|
||||
await WaitForAsync(async () =>
|
||||
{
|
||||
await using var db = await harness.CreateConfigDbContextAsync();
|
||||
var d = await db.Deployments.AsNoTracking()
|
||||
.FirstOrDefaultAsync(d => d.DeploymentId == deploymentId, Ct);
|
||||
return d?.Status == DeploymentStatus.Sealed;
|
||||
}, TimeSpan.FromSeconds(20));
|
||||
|
||||
await using (var db = await harness.CreateConfigDbContextAsync())
|
||||
{
|
||||
var nodeStates = await db.NodeDeploymentStates.AsNoTracking()
|
||||
.Where(s => s.DeploymentId == deploymentId)
|
||||
.ToListAsync(Ct);
|
||||
nodeStates.Count.ShouldBe(2);
|
||||
nodeStates.ShouldAllBe(s => s.Status == NodeDeploymentStatus.Applied);
|
||||
}
|
||||
|
||||
// Per-node driver presence: each DriverHostActor spawned ONLY its own cluster's slice.
|
||||
// Poll (rather than single-shot Ask) so transient timing after Sealed doesn't flake.
|
||||
var diagnostics = scope.ServiceProvider.GetRequiredService<IFleetDiagnosticsClient>();
|
||||
|
||||
string[] mainDrivers = [];
|
||||
await WaitForAsync(async () =>
|
||||
{
|
||||
mainDrivers = await GetDriverNamesAsync(diagnostics, harness.NodeANodeId);
|
||||
return mainDrivers.SequenceEqual(new[] { MainDriverId });
|
||||
}, TimeSpan.FromSeconds(10));
|
||||
mainDrivers.ShouldBe(new[] { MainDriverId });
|
||||
|
||||
string[] siteADrivers = [];
|
||||
await WaitForAsync(async () =>
|
||||
{
|
||||
siteADrivers = await GetDriverNamesAsync(diagnostics, harness.NodeBNodeId);
|
||||
return siteADrivers.SequenceEqual(new[] { SiteADriverId });
|
||||
}, TimeSpan.FromSeconds(10));
|
||||
siteADrivers.ShouldBe(new[] { SiteADriverId });
|
||||
}
|
||||
|
||||
/// <summary>Asks a node's DriverHostActor (over the cluster) for the names of its spawned drivers.</summary>
|
||||
private static async Task<string[]> GetDriverNamesAsync(IFleetDiagnosticsClient diagnostics, string nodeId)
|
||||
{
|
||||
var snapshot = await diagnostics.GetDiagnosticsAsync(NodeId.Parse(nodeId), Ct);
|
||||
return snapshot.Drivers.Select(d => d.Name).OrderBy(n => n, StringComparer.Ordinal).ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Seeds two single-node clusters (MAIN, SITE-A), one <see cref="ClusterNode"/> per cluster bound
|
||||
/// to the supplied <c>host:port</c> identities, and one <see cref="DriverInstance"/> per cluster.
|
||||
/// <see cref="ConfigComposer.SnapshotAndFlattenAsync"/> emits these straight into the artifact's
|
||||
/// <c>Clusters</c> / <c>Nodes</c> / <c>DriverInstances</c> arrays, giving each DriverHostActor the
|
||||
/// multi-cluster artifact <c>DeploymentArtifact.ResolveClusterScope</c> filters by NodeId.
|
||||
/// </summary>
|
||||
private static async Task SeedTwoClusterConfigAsync(TwoNodeClusterHarness harness, string mainNodeId, string siteANodeId)
|
||||
{
|
||||
await using var db = await harness.CreateConfigDbContextAsync();
|
||||
|
||||
db.ServerClusters.AddRange(
|
||||
NewCluster(MainCluster, "Main Cluster", "central"),
|
||||
NewCluster(SiteACluster, "Site A Cluster", "site-a"));
|
||||
|
||||
db.Namespaces.AddRange(
|
||||
NewNamespace(MainCluster, "MAIN-equipment", "urn:zb:central:equipment"),
|
||||
NewNamespace(SiteACluster, "SITE-A-equipment", "urn:zb:site-a:equipment"));
|
||||
|
||||
db.ClusterNodes.AddRange(
|
||||
NewNode(mainNodeId, MainCluster, "urn:zb:central:node-a"),
|
||||
NewNode(siteANodeId, SiteACluster, "urn:zb:site-a:node-b"));
|
||||
|
||||
db.DriverInstances.AddRange(
|
||||
NewDriver(MainDriverId, MainCluster, "MAIN-equipment"),
|
||||
NewDriver(SiteADriverId, SiteACluster, "SITE-A-equipment"));
|
||||
|
||||
await db.SaveChangesAsync(Ct);
|
||||
}
|
||||
|
||||
private static ServerCluster NewCluster(string clusterId, string name, string site) => new()
|
||||
{
|
||||
ClusterId = clusterId,
|
||||
Name = name,
|
||||
Enterprise = "zb",
|
||||
Site = site,
|
||||
NodeCount = 1,
|
||||
RedundancyMode = RedundancyMode.None,
|
||||
CreatedBy = "test",
|
||||
};
|
||||
|
||||
private static Namespace NewNamespace(string clusterId, string namespaceId, string uri) => new()
|
||||
{
|
||||
NamespaceId = namespaceId,
|
||||
ClusterId = clusterId,
|
||||
Kind = NamespaceKind.Equipment,
|
||||
NamespaceUri = uri,
|
||||
};
|
||||
|
||||
private static ClusterNode NewNode(string nodeId, string clusterId, string applicationUri) => new()
|
||||
{
|
||||
NodeId = nodeId,
|
||||
ClusterId = clusterId,
|
||||
Host = TwoNodeClusterHarness.LoopbackHost,
|
||||
ApplicationUri = applicationUri,
|
||||
CreatedBy = "test",
|
||||
};
|
||||
|
||||
private static DriverInstance NewDriver(string driverInstanceId, string clusterId, string namespaceId) => new()
|
||||
{
|
||||
DriverInstanceId = driverInstanceId,
|
||||
ClusterId = clusterId,
|
||||
NamespaceId = namespaceId,
|
||||
Name = driverInstanceId,
|
||||
DriverType = "ModbusTcp",
|
||||
Enabled = true,
|
||||
DriverConfig = "{}",
|
||||
};
|
||||
|
||||
private static async Task WaitForAsync(Func<Task<bool>> condition, TimeSpan timeout)
|
||||
{
|
||||
var deadline = DateTime.UtcNow + timeout;
|
||||
while (DateTime.UtcNow < deadline)
|
||||
{
|
||||
if (await condition()) return;
|
||||
await Task.Delay(200);
|
||||
}
|
||||
throw new TimeoutException($"Condition not met within {timeout}");
|
||||
}
|
||||
}
|
||||
@@ -90,6 +90,23 @@ public sealed class TwoNodeClusterHarness : IAsyncDisposable
|
||||
/// <summary>Gets the Akka ActorSystem for node B.</summary>
|
||||
public ActorSystem NodeBSystem => NodeB.Services.GetRequiredService<ActorSystem>();
|
||||
|
||||
/// <summary>
|
||||
/// The <c>host:port</c> identity each node's <c>DriverHostActor</c> / <c>NodeDeploymentState</c>
|
||||
/// row uses, derived the same way <c>ClusterRoleInfo</c> derives <c>LocalNode</c> from
|
||||
/// <c>Cluster:PublicHostname</c> + <c>Cluster:Port</c>. Seed a <c>ClusterNode.NodeId</c> with this
|
||||
/// value to bind a node to a logical ClusterId for multi-cluster scoping tests.
|
||||
/// </summary>
|
||||
public string NodeANodeId => $"{LoopbackHost}:{NodeAAkkaPort}";
|
||||
|
||||
/// <inheritdoc cref="NodeANodeId"/>
|
||||
public string NodeBNodeId => $"{LoopbackHost}:{NodeBAkkaPort}";
|
||||
|
||||
/// <summary>Opens a new <see cref="OtOpcUaConfigDbContext"/> over the shared ConfigDb (the same
|
||||
/// store both nodes read) so a test can seed clusters/nodes/drivers before triggering a deploy.</summary>
|
||||
/// <returns>A new DbContext the caller is responsible for disposing.</returns>
|
||||
public Task<OtOpcUaConfigDbContext> CreateConfigDbContextAsync()
|
||||
=> NodeA.Services.GetRequiredService<IDbContextFactory<OtOpcUaConfigDbContext>>().CreateDbContextAsync();
|
||||
|
||||
/// <summary>Boots both nodes and waits up to <paramref name="formationTimeout"/> for cluster convergence.</summary>
|
||||
/// <param name="formationTimeout">Maximum time to wait for cluster formation; defaults to 20 seconds if not provided.</param>
|
||||
public static async Task<TwoNodeClusterHarness> StartAsync(TimeSpan? formationTimeout = null)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Shouldly;
|
||||
@@ -8,6 +9,78 @@ namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
|
||||
|
||||
public sealed class DeploymentArtifactTests
|
||||
{
|
||||
private static byte[] BlobOf(object snapshot) =>
|
||||
System.Text.Json.JsonSerializer.SerializeToUtf8Bytes(snapshot);
|
||||
|
||||
private static object MultiClusterSnapshot() => new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "main-galaxy", Name = "g", DriverType = "GalaxyMxGateway", Enabled = true, DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "sa-modbus", Name = "m", DriverType = "Modbus", Enabled = true, DriverConfig = "{}", ClusterId = "SITE-A", NamespaceId = "sa-ns" },
|
||||
},
|
||||
};
|
||||
|
||||
/// <summary>Verifies a single-cluster artifact resolves to None (apply everything).</summary>
|
||||
[Fact]
|
||||
public void ResolveClusterScope_single_cluster_artifact_returns_None()
|
||||
{
|
||||
var blob = BlobOf(new { Clusters = new[] { new { ClusterId = "MAIN" } }, Nodes = Array.Empty<object>() });
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(blob, "central-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.None);
|
||||
}
|
||||
|
||||
/// <summary>Verifies a multi-cluster artifact scopes a known node to its own ClusterId.</summary>
|
||||
[Fact]
|
||||
public void ResolveClusterScope_multi_cluster_known_node_scopes_to_its_cluster()
|
||||
{
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(BlobOf(MultiClusterSnapshot()), "site-a-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.ScopeTo);
|
||||
scope.ClusterId.ShouldBe("SITE-A");
|
||||
}
|
||||
|
||||
/// <summary>Verifies a multi-cluster artifact suppresses an unknown node.</summary>
|
||||
[Fact]
|
||||
public void ResolveClusterScope_multi_cluster_unknown_node_suppresses()
|
||||
{
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(BlobOf(MultiClusterSnapshot()), "ghost-9:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.Suppress);
|
||||
}
|
||||
|
||||
/// <summary>Verifies the scoped parse returns only the node's own cluster's drivers.</summary>
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_returns_only_my_clusters_drivers()
|
||||
{
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(BlobOf(MultiClusterSnapshot()), "central-1:4053");
|
||||
specs.Select(s => s.DriverInstanceId).ShouldBe(new[] { "main-galaxy" });
|
||||
}
|
||||
|
||||
/// <summary>Verifies the scoped parse returns nothing for an unknown node.</summary>
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_unknown_node_returns_empty()
|
||||
{
|
||||
var specs = DeploymentArtifact.ParseDriverInstances(BlobOf(MultiClusterSnapshot()), "ghost-9:4053");
|
||||
specs.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
/// <summary>Verifies the scoped parse returns all drivers for a single-cluster artifact.</summary>
|
||||
[Fact]
|
||||
public void ParseDriverInstances_scoped_single_cluster_returns_all()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" } },
|
||||
Nodes = new[] { new { NodeId = "n1:4053", ClusterId = "MAIN" } },
|
||||
DriverInstances = new[] { new { DriverInstanceRowId = Guid.NewGuid(), DriverInstanceId = "d1", Name = "d", DriverType = "Modbus", Enabled = true, DriverConfig = "{}", ClusterId = "MAIN" } },
|
||||
});
|
||||
DeploymentArtifact.ParseDriverInstances(blob, "anything:4053").Select(s => s.DriverInstanceId).ShouldBe(new[] { "d1" });
|
||||
}
|
||||
/// <summary>Verifies that empty blob returns empty list.</summary>
|
||||
[Fact]
|
||||
public void Empty_blob_returns_empty_list()
|
||||
@@ -222,4 +295,145 @@ public sealed class DeploymentArtifactTests
|
||||
|
||||
specs.Single().DriverInstanceId.ShouldBe("DI-ok");
|
||||
}
|
||||
|
||||
/// <summary>Verifies that a malformed blob resolves to None rather than throwing.</summary>
|
||||
[Fact]
|
||||
public void ResolveClusterScope_malformed_blob_returns_None()
|
||||
{
|
||||
var scope = DeploymentArtifact.ResolveClusterScope("not json"u8.ToArray(), "central-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.None);
|
||||
}
|
||||
|
||||
/// <summary>Verifies that a blank ClusterId in the node row resolves to Suppress.</summary>
|
||||
[Fact]
|
||||
public void ResolveClusterScope_blank_cluster_id_suppresses()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[] { new { NodeId = "central-1:4053", ClusterId = "" } },
|
||||
});
|
||||
DeploymentArtifact.ResolveClusterScope(blob, "central-1:4053").Mode.ShouldBe(ClusterFilterMode.Suppress);
|
||||
}
|
||||
|
||||
/// <summary>Verifies that NodeId matching in ResolveClusterScope is case-insensitive.</summary>
|
||||
[Fact]
|
||||
public void ResolveClusterScope_node_id_match_is_case_insensitive()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[] { new { NodeId = "Central-1:4053", ClusterId = "MAIN" } },
|
||||
});
|
||||
var scope = DeploymentArtifact.ResolveClusterScope(blob, "central-1:4053");
|
||||
scope.Mode.ShouldBe(ClusterFilterMode.ScopeTo);
|
||||
scope.ClusterId.ShouldBe("MAIN");
|
||||
}
|
||||
|
||||
private static object MultiClusterSnapshotWithTags() => new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceId = "main-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
new { DriverInstanceId = "sa-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "SITE-A", NamespaceId = "sa-ns" },
|
||||
},
|
||||
Namespaces = new[]
|
||||
{
|
||||
new { NamespaceId = "main-ns", ClusterId = "MAIN", Kind = 1 },
|
||||
new { NamespaceId = "sa-ns", ClusterId = "SITE-A", Kind = 1 },
|
||||
},
|
||||
Tags = new[]
|
||||
{
|
||||
new { TagId = "t-main", DriverInstanceId = "main-galaxy", EquipmentId = (string?)null, Name = "M1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
new { TagId = "t-sa", DriverInstanceId = "sa-galaxy", EquipmentId = (string?)null, Name = "S1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
},
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_keeps_only_my_clusters_drivers_and_tags()
|
||||
{
|
||||
var blob = BlobOf(MultiClusterSnapshotWithTags());
|
||||
|
||||
var main = DeploymentArtifact.ParseComposition(blob, "central-1:4053");
|
||||
main.DriverInstancePlans.Select(d => d.DriverInstanceId).ShouldBe(new[] { "main-galaxy" });
|
||||
main.GalaxyTags.Select(t => t.TagId).ShouldBe(new[] { "t-main" });
|
||||
|
||||
var siteA = DeploymentArtifact.ParseComposition(blob, "site-a-1:4053");
|
||||
siteA.DriverInstancePlans.Select(d => d.DriverInstanceId).ShouldBe(new[] { "sa-galaxy" });
|
||||
siteA.GalaxyTags.Select(t => t.TagId).ShouldBe(new[] { "t-sa" });
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_unknown_node_is_empty()
|
||||
{
|
||||
var comp = DeploymentArtifact.ParseComposition(BlobOf(MultiClusterSnapshotWithTags()), "ghost-9:4053");
|
||||
comp.GalaxyTags.ShouldBeEmpty();
|
||||
comp.DriverInstancePlans.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseComposition_single_cluster_node_id_overload_matches_legacy()
|
||||
{
|
||||
var blob = BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" } },
|
||||
Nodes = new[] { new { NodeId = "n1:4053", ClusterId = "MAIN" } },
|
||||
DriverInstances = new[] { new { DriverInstanceId = "d1", DriverType = "Modbus", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "ns" } },
|
||||
});
|
||||
DeploymentArtifact.ParseComposition(blob, "anything:4053").DriverInstancePlans.Count
|
||||
.ShouldBe(DeploymentArtifact.ParseComposition(blob).DriverInstancePlans.Count);
|
||||
}
|
||||
|
||||
/// <summary>An artifact where an equipment's driver is in the node's cluster but its UNS line's area
|
||||
/// is in another cluster: <paramref name="areaCluster"/> controls the area's ClusterId.</summary>
|
||||
private static byte[] OrphanEquipmentBlob(string areaCluster) => BlobOf(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[] { new { NodeId = "central-1:4053", ClusterId = "MAIN" } },
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceId = "main-driver", DriverType = "Modbus", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
},
|
||||
UnsAreas = new[] { new { UnsAreaId = "area-1", Name = "Area1", ClusterId = areaCluster } },
|
||||
UnsLines = new[] { new { UnsLineId = "line-1", UnsAreaId = "area-1", Name = "Line1" } },
|
||||
Equipment = new[]
|
||||
{
|
||||
new { EquipmentId = "equip-1", Name = "Equip1", UnsLineId = "line-1", DriverInstanceId = "main-driver" },
|
||||
},
|
||||
});
|
||||
|
||||
/// <summary>Verifies the inconsistency callback fires when a kept equipment's UNS line belongs to
|
||||
/// another cluster (a cross-cluster orphan binding).</summary>
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_flags_cross_cluster_orphan_equipment()
|
||||
{
|
||||
var warnings = new List<string>();
|
||||
var comp = DeploymentArtifact.ParseComposition(
|
||||
OrphanEquipmentBlob(areaCluster: "SITE-A"), "central-1:4053", warnings.Add);
|
||||
|
||||
comp.EquipmentNodes.Select(e => e.EquipmentId).ShouldBe(new[] { "equip-1" }); // still returned
|
||||
warnings.Count.ShouldBe(1);
|
||||
warnings[0].ShouldContain("equip-1");
|
||||
warnings[0].ShouldContain("line-1");
|
||||
}
|
||||
|
||||
/// <summary>Verifies the inconsistency callback does NOT fire when the equipment, its driver, and its
|
||||
/// UNS line/area are all in the same cluster (the normal, invariant-respecting case).</summary>
|
||||
[Fact]
|
||||
public void ParseComposition_scoped_consistent_equipment_does_not_warn()
|
||||
{
|
||||
var warnings = new List<string>();
|
||||
var comp = DeploymentArtifact.ParseComposition(
|
||||
OrphanEquipmentBlob(areaCluster: "MAIN"), "central-1:4053", warnings.Add);
|
||||
|
||||
comp.EquipmentNodes.Select(e => e.EquipmentId).ShouldBe(new[] { "equip-1" });
|
||||
comp.UnsLines.Select(l => l.UnsLineId).ShouldBe(new[] { "line-1" });
|
||||
warnings.ShouldBeEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -132,6 +132,86 @@ public sealed class DriverHostActorReconcileTests : RuntimeActorTestBase
|
||||
snap.Drivers.Count.ShouldBe(1);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies per-ClusterId scoping at the actor level: a 2-cluster artifact (MAIN + SITE-A,
|
||||
/// one driver each) dispatched to a node whose ClusterNode row puts it in SITE-A spawns ONLY
|
||||
/// the SITE-A driver — and the node still reaches Applied (the ack fires unconditionally even
|
||||
/// when a node's cluster slice is empty).
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void DriverHostActor_spawns_only_its_clusters_drivers()
|
||||
{
|
||||
var db = NewInMemoryDbFactory();
|
||||
var factory = new CountingDriverFactory("Modbus");
|
||||
// Both drivers are Modbus so the factory could create either — scoping, not type support,
|
||||
// must be what excludes the MAIN driver.
|
||||
var deploymentId = SeedMultiClusterDeployment(db, RevA,
|
||||
("main-modbus", "Modbus", "{}", true, "MAIN"),
|
||||
("sa-modbus", "Modbus", "{}", true, "SITE-A"));
|
||||
|
||||
// This node belongs to SITE-A per the Nodes (ClusterNode) rows.
|
||||
var siteANode = NodeId.Parse("site-a-1:4053");
|
||||
var coordinator = CreateTestProbe();
|
||||
var actor = Sys.ActorOf(DriverHostActor.Props(
|
||||
db, siteANode, coordinator.Ref,
|
||||
driverFactory: factory,
|
||||
localRoles: new HashSet<string> { "driver" }));
|
||||
|
||||
actor.Tell(new DispatchDeployment(deploymentId, RevA, CorrelationId.NewId()));
|
||||
|
||||
// The node still reaches Applied even though it hosts only its own cluster's slice.
|
||||
coordinator.ExpectMsg<ApplyAck>(TimeSpan.FromSeconds(5)).Outcome.ShouldBe(ApplyAckOutcome.Applied);
|
||||
|
||||
// Only the SITE-A driver was constructed — the MAIN driver was filtered out by scoping.
|
||||
AwaitAssert(() => factory.CreateCount.ShouldBe(1), duration: TimeSpan.FromSeconds(3));
|
||||
|
||||
actor.Tell(new GetDiagnostics(CorrelationId.NewId()), coordinator.Ref);
|
||||
var snap = coordinator.ExpectMsg<Commons.Interfaces.NodeDiagnosticsSnapshot>(TimeSpan.FromSeconds(2));
|
||||
snap.Drivers.Count.ShouldBe(1);
|
||||
snap.Drivers[0].Name.ShouldBe("sa-modbus");
|
||||
}
|
||||
|
||||
private static DeploymentId SeedMultiClusterDeployment(
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> db,
|
||||
RevisionHash rev,
|
||||
params (string Id, string Type, string Config, bool Enabled, string ClusterId)[] drivers)
|
||||
{
|
||||
var artifact = JsonSerializer.SerializeToUtf8Bytes(new
|
||||
{
|
||||
// >1 cluster + matching Nodes rows triggers ScopeTo (single-cluster would resolve to None).
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = drivers.Select(d => new
|
||||
{
|
||||
DriverInstanceRowId = Guid.NewGuid(),
|
||||
DriverInstanceId = d.Id,
|
||||
Name = d.Id,
|
||||
DriverType = d.Type,
|
||||
Enabled = d.Enabled,
|
||||
DriverConfig = d.Config,
|
||||
ClusterId = d.ClusterId,
|
||||
}).ToArray(),
|
||||
});
|
||||
|
||||
var id = DeploymentId.NewId();
|
||||
using var ctx = db.CreateDbContext();
|
||||
ctx.Deployments.Add(new Deployment
|
||||
{
|
||||
DeploymentId = id.Value,
|
||||
RevisionHash = rev.Value,
|
||||
Status = DeploymentStatus.Sealed,
|
||||
CreatedBy = "test",
|
||||
SealedAtUtc = DateTime.UtcNow,
|
||||
ArtifactBlob = artifact,
|
||||
});
|
||||
ctx.SaveChanges();
|
||||
return id;
|
||||
}
|
||||
|
||||
private static DeploymentId SeedDeploymentWithDrivers(
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> db,
|
||||
RevisionHash rev,
|
||||
|
||||
@@ -98,6 +98,104 @@ public sealed class OpcUaPublishActorRebuildTests : RuntimeActorTestBase
|
||||
AwaitAssert(() => sink.RebuildCalls.ShouldBe(1), duration: TimeSpan.FromMilliseconds(500));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wiring proof for per-ClusterId scoping (Task 4): a multi-cluster artifact must
|
||||
/// materialise ONLY the local node's cluster slice. Mirrors the multi-cluster artifact
|
||||
/// shape exercised in <c>DeploymentArtifactTests</c> (MAIN + SITE-A, one Galaxy driver +
|
||||
/// one SystemPlatform tag each). The scoped rebuild for the SITE-A node must surface the
|
||||
/// SITE-A tag (<c>t-sa</c> → variable <c>F.S1</c>) and NOT MAIN's (<c>t-main</c> →
|
||||
/// <c>F.M1</c>); the mirror holds for the MAIN node. Without the production scoping edit,
|
||||
/// the unscoped parse would materialise BOTH variables on every node.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Rebuild_materialises_only_the_nodes_cluster()
|
||||
{
|
||||
// --- SITE-A node: only the SITE-A tag's variable, never MAIN's. ---
|
||||
var dbA = NewInMemoryDbFactory();
|
||||
var sinkA = new RecordingSink();
|
||||
var applierA = new Phase7Applier(sinkA, NullLogger<Phase7Applier>.Instance);
|
||||
SeedMultiClusterDeployment(dbA);
|
||||
|
||||
var siteActor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(
|
||||
sink: sinkA,
|
||||
dbFactory: dbA,
|
||||
applier: applierA,
|
||||
localNode: NodeId.Parse("site-a-1:4053")));
|
||||
|
||||
siteActor.Tell(new OpcUaPublishActor.RebuildAddressSpace(CorrelationId.NewId()));
|
||||
|
||||
AwaitAssert(() => sinkA.RebuildCalls.ShouldBe(1), duration: TimeSpan.FromSeconds(2));
|
||||
// t-sa (Name "S1", FolderPath "F") → MxAccessRef "F.S1" → variable node "F.S1".
|
||||
sinkA.Calls.ShouldContain("EV:F.S1");
|
||||
// t-main (MAIN cluster) must NOT leak onto the SITE-A node.
|
||||
sinkA.Calls.ShouldNotContain("EV:F.M1");
|
||||
|
||||
// --- MAIN node: the mirror — only MAIN's tag's variable, never SITE-A's. ---
|
||||
var dbM = NewInMemoryDbFactory();
|
||||
var sinkM = new RecordingSink();
|
||||
var applierM = new Phase7Applier(sinkM, NullLogger<Phase7Applier>.Instance);
|
||||
SeedMultiClusterDeployment(dbM);
|
||||
|
||||
var mainActor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(
|
||||
sink: sinkM,
|
||||
dbFactory: dbM,
|
||||
applier: applierM,
|
||||
localNode: NodeId.Parse("central-1:4053")));
|
||||
|
||||
mainActor.Tell(new OpcUaPublishActor.RebuildAddressSpace(CorrelationId.NewId()));
|
||||
|
||||
AwaitAssert(() => sinkM.RebuildCalls.ShouldBe(1), duration: TimeSpan.FromSeconds(2));
|
||||
sinkM.Calls.ShouldContain("EV:F.M1");
|
||||
sinkM.Calls.ShouldNotContain("EV:F.S1");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Seal a 2-cluster deployment (MAIN + SITE-A) whose artifact mirrors the multi-cluster
|
||||
/// shape the composer emits: a <c>Clusters</c> + <c>Nodes</c> map, one SystemPlatform
|
||||
/// namespace + Galaxy driver + Galaxy tag per cluster. Used by
|
||||
/// <see cref="Rebuild_materialises_only_the_nodes_cluster"/>.
|
||||
/// </summary>
|
||||
private static void SeedMultiClusterDeployment(IDbContextFactory<OtOpcUaConfigDbContext> dbFactory)
|
||||
{
|
||||
var artifact = JsonSerializer.SerializeToUtf8Bytes(new
|
||||
{
|
||||
Clusters = new[] { new { ClusterId = "MAIN" }, new { ClusterId = "SITE-A" } },
|
||||
Nodes = new[]
|
||||
{
|
||||
new { NodeId = "central-1:4053", ClusterId = "MAIN" },
|
||||
new { NodeId = "site-a-1:4053", ClusterId = "SITE-A" },
|
||||
},
|
||||
DriverInstances = new[]
|
||||
{
|
||||
new { DriverInstanceId = "main-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "MAIN", NamespaceId = "main-ns" },
|
||||
new { DriverInstanceId = "sa-galaxy", DriverType = "GalaxyMxGateway", DriverConfig = "{}", ClusterId = "SITE-A", NamespaceId = "sa-ns" },
|
||||
},
|
||||
Namespaces = new[]
|
||||
{
|
||||
new { NamespaceId = "main-ns", ClusterId = "MAIN", Kind = 1 }, // NamespaceKind.SystemPlatform
|
||||
new { NamespaceId = "sa-ns", ClusterId = "SITE-A", Kind = 1 },
|
||||
},
|
||||
Tags = new[]
|
||||
{
|
||||
new { TagId = "t-main", DriverInstanceId = "main-galaxy", EquipmentId = (string?)null, Name = "M1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
new { TagId = "t-sa", DriverInstanceId = "sa-galaxy", EquipmentId = (string?)null, Name = "S1", FolderPath = "F", DataType = "Boolean", TagConfig = "{}" },
|
||||
},
|
||||
ScriptedAlarms = Array.Empty<object>(),
|
||||
});
|
||||
|
||||
using var ctx = dbFactory.CreateDbContext();
|
||||
ctx.Deployments.Add(new Deployment
|
||||
{
|
||||
DeploymentId = Guid.NewGuid(),
|
||||
RevisionHash = new string('b', 64),
|
||||
Status = DeploymentStatus.Sealed,
|
||||
CreatedBy = "test",
|
||||
SealedAtUtc = DateTime.UtcNow,
|
||||
ArtifactBlob = artifact,
|
||||
});
|
||||
ctx.SaveChanges();
|
||||
}
|
||||
|
||||
private static void SeedDeployment(
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> dbFactory,
|
||||
string[] equipmentIds,
|
||||
|
||||
Reference in New Issue
Block a user