41 Commits

Author SHA1 Message Date
Joseph Doherty a7dd2f59d0 docs(otopcua): record FixedTree live-validation on wonder-app-vd03 (Makino Z-34184)
v2-ci / build (push) Failing after 39s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests) (push) Has been skipped
2026-06-26 16:02:05 -04:00
Joseph Doherty 04159fd716 test(otopcua): ConfigComposer->ParseComposition DeviceHost round-trip (follow-up E) 2026-06-26 15:39:40 -04:00
Joseph Doherty 1058542d80 docs(otopcua): record FixedTree follow-ups A-E as implemented (design, plan, RESUME) 2026-06-26 15:19:46 -04:00
Joseph Doherty 0074f37a64 test(otopcua): tighten multi-device collapse assertion + clear warn-state on removal (follow-up E) 2026-06-26 15:16:59 -04:00
Joseph Doherty 50f08635ec feat(otopcua): multi-device-per-driver FixedTree partition (follow-up E) 2026-06-26 15:00:11 -04:00
Joseph Doherty 51721df563 refactor(otopcua): extract authored-only send helper + empty-authored dropped-path test (follow-up D) 2026-06-26 14:44:26 -04:00
Joseph Doherty 05c820795a perf(otopcua): one SetDesiredSubscriptions per driver per redeploy (follow-up D) 2026-06-26 14:30:16 -04:00
Joseph Doherty cde16063d9 test(otopcua): negative + convergence coverage for rebind re-trigger (follow-up C) 2026-06-26 14:18:01 -04:00
Joseph Doherty 533671487e feat(otopcua): re-trigger discovery on config-unchanged rebind (follow-up C) 2026-06-26 14:06:50 -04:00
Joseph Doherty adcd7b57c1 feat(otopcua): driver-level equipment resolution + per-equipment discovered-plan cache (follow-up E) 2026-06-26 13:33:21 -04:00
Joseph Doherty 915492a759 refactor(otopcua): align device-host map parity + document EquipmentNode rebuild trade-off (follow-up E) 2026-06-26 13:22:26 -04:00
Joseph Doherty cb7ce7f171 feat(otopcua): EquipmentNode carries DriverInstanceId/DeviceId/DeviceHost (follow-up E projection) 2026-06-26 13:07:31 -04:00
Joseph Doherty e7d5ebe956 fix(otopcua): cancel pending rediscover timer on TriggerRediscovery + test hardening (follow-up C) 2026-06-26 12:57:08 -04:00
Joseph Doherty f7358bf4fd feat(otopcua): DriverInstanceActor.TriggerRediscovery message (follow-up C) 2026-06-26 12:45:26 -04:00
Joseph Doherty a1a655e6c9 test(otopcua): Once re-discovery reruns one pass per reconnect + comment tidy (follow-up B) 2026-06-26 12:38:52 -04:00
Joseph Doherty ce34816a50 feat(otopcua): DriverInstanceActor honors RediscoverPolicy (Never/Once/UntilStable) (follow-up B) 2026-06-26 12:32:28 -04:00
Joseph Doherty efbdaf853c feat(otopcua): set Modbus/S7/Galaxy re-discovery policy to Once + Once-branch test (follow-up B) 2026-06-26 12:26:28 -04:00
Joseph Doherty a378b572af feat(otopcua): add ITagDiscovery.RediscoverPolicy + per-driver assignments (follow-up B) 2026-06-26 12:18:44 -04:00
Joseph Doherty c2c368dcec feat(otopcua): make FixedTree re-discovery per-pass timeout injectable (follow-up A) 2026-06-26 12:12:47 -04:00
Joseph Doherty 37cac5dee5 docs(otopcua): record FixedTree-under-Equipment injection feature (design, plan, status) 2026-06-26 09:11:40 -04:00
Joseph Doherty 25ccd25b6b test(otopcua): assert exact discovered NodeId in the e2e 2026-06-26 09:04:26 -04:00
Joseph Doherty 5104540e32 test(otopcua): cover discovered-node rebind drop + clarify re-apply scope 2026-06-26 09:01:01 -04:00
Joseph Doherty 1aa13ebd27 test(otopcua): end-to-end discovered-node injection + value flow 2026-06-26 08:46:28 -04:00
Joseph Doherty 0788bad145 feat(otopcua): re-inject discovered nodes after address-space rebuild 2026-06-26 08:36:52 -04:00
Joseph Doherty b1e4fba792 fix(otopcua): skip redundant discovered-node re-apply + guard tests 2026-06-26 08:28:42 -04:00
Joseph Doherty 21298ec1b2 fix(otopcua): resume discovery on actor context + bound/harden re-discovery 2026-06-26 08:19:12 -04:00
Joseph Doherty b9b8d3d389 feat(otopcua): inject discovered nodes into the equipment projection on connect 2026-06-26 07:59:01 -04:00
Joseph Doherty cf6b1abf4c feat(otopcua): re-run driver discovery on reconnect 2026-06-26 07:44:28 -04:00
Joseph Doherty 51634cca38 feat(otopcua): driver-instance post-connect bounded re-discovery 2026-06-26 07:40:24 -04:00
Joseph Doherty bb21db0a8e fix(otopcua): log+document discovered-node materialise message 2026-06-26 07:32:13 -04:00
Joseph Doherty 34fc304712 fix(otopcua): guard discovered-injection equipment id + cover deferred forwarding 2026-06-26 07:27:09 -04:00
Joseph Doherty ccf93fc029 feat(otopcua): OpcUaPublishActor handles discovered-node materialisation 2026-06-26 07:24:22 -04:00
Joseph Doherty 598cdfad5a feat(otopcua): applier pass to materialise discovered nodes idempotently 2026-06-26 07:16:36 -04:00
Joseph Doherty f8406d348c fix(otopcua): report NodeAdded model-change outside the node Lock 2026-06-26 07:06:43 -04:00
Joseph Doherty 93f7586590 fix(otopcua): guard root-level discovered var parent + tighten mapper 2026-06-26 06:59:34 -04:00
Joseph Doherty 33b0e639a5 feat(otopcua): GeneralModelChangeEvent(NodeAdded) for runtime node adds 2026-06-26 06:55:52 -04:00
Joseph Doherty d7a0da5ea1 feat(otopcua): map discovered nodes under an equipment subfolder 2026-06-26 06:47:18 -04:00
Joseph Doherty da55c6913d feat(otopcua): capturing address-space builder for driver discovery 2026-06-26 06:39:18 -04:00
Joseph Doherty 74e07225c9 docs(otopcua): record the equipment-tag data-plane investigation + fix chain 2026-06-26 06:00:05 -04:00
Joseph Doherty 6600ce9940 feat(otopcua): bridge Akka actor logs into Serilog
DriverHostActor/DriverInstanceActor and cluster events log via Akka's ILoggingAdapter, which had no Serilog bridge — under the Windows service host the default StandardOutLogger output is discarded, making the driver-role actor graph invisible (this masked the data-plane diagnosis).

- Add Akka.Logger.Serilog 1.5.60 (deps satisfied by Akka 1.5.62 / Serilog 4.3.1).
- WithOtOpcUaClusterBootstrap: ConfigureLoggers(DebugLevel; ClearLoggers(); AddLogger<SerilogLogger>()) — Akka.Hosting owns logger setup, so HOCON akka.loggers alone is not honored.
- Program.cs: set static Serilog.Log.Logger from the DI root logger after Build() (AddZbSerilog registers the MEL provider but not the static logger, which the Akka SerilogLogger and the startup banner both need).
2026-06-26 06:00:05 -04:00
Joseph Doherty 235b8b8e6d fix(focas): serialize per-device wire I/O + bound reads; tolerate AdminUI config formats
Equipment tags were stuck at Bad_WaitingForInitialData on the deployed driver: the equipment poll, fixed-tree loop, probe and recycle shared one FOCAS/2 socket with no serialization, and the steady-state read had no timeout — concurrent reads collided and a stalled read hung forever, never overwriting the node's initial-data seed.

- SynchronizedFocasClient: per-device SemaphoreSlim gate + per-call timeout around every wire op (Connect/Probe gated, not double-bounded); wired in EnsureConnectedAsync. ReadAsync/WriteAsync map a per-call timeout to BadCommunicationError instead of rethrowing.
- FlexibleStringConverter on FOCAS config Series: the AdminUI persists the enum as a number ("series":6); accept number-or-string instead of throwing -> stub.
- FocasHostAddress.TryParse tolerates a scheme-less {ip}[:{port}] (AdminUI hostAddress form); canonical focas:// unchanged, malformed schemes still rejected.

247 FOCAS tests green; each fix has a regression test. Live-validated on wonder-app-vd03 (tags read Good).
2026-06-26 05:59:54 -04:00
63 changed files with 6696 additions and 31 deletions
+1
View File
@@ -8,6 +8,7 @@
<PackageVersion Include="Akka.Cluster.Hosting" Version="1.5.62" />
<PackageVersion Include="Akka.Cluster.Tools" Version="1.5.62" />
<PackageVersion Include="Akka.Hosting" Version="1.5.62" />
<PackageVersion Include="Akka.Logger.Serilog" Version="1.5.60" />
<PackageVersion Include="Akka.Remote" Version="1.5.62" />
<PackageVersion Include="Akka.Remote.Hosting" Version="1.5.62" />
<PackageVersion Include="Akka.Streams" Version="1.5.62" />
@@ -67,3 +67,33 @@ returning `Bad_WaitingForInitialData` until the rebuilt `ZB.MOM.WW.OtOpcUa.Drive
self-contained host publish) is deployed to `E:\ApiInstall\OtOpcUa\` and `OtOpcUaHost` is restarted. Once
redeployed, `parts-count`/`parts-required` should go Good (FixedTree + PMC/Parameter still pending the
follow-on v3 command work).
## FixedTree under the Equipment node (feature built 2026-06-26)
The FOCAS **FixedTree** (Identity / Axes / Spindle / Program / Timers) now surfaces under the equipment as
read-only value nodes, via a generic post-connect `ITagDiscovery` injection feature (branch
`feat/focas-fixedtree-equipment-injection`; design + plan at
[`docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](../plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md)
and [`…-injection.md`](../plans/2026-06-26-otopcua-fixedtree-equipment-injection.md)). After the driver
connects and its `FixedTreeCache` populates (~02 s), nodes are grafted at e.g.
`ns=2;s=EQ-3686c0272279/FOCAS/Identity/SeriesNumber` and `…/FOCAS/Axes/X/AbsolutePosition`, carrying live
values through the same path as the authored `parts-count`/`parts-required` tags, and survive redeploys.
**✅ LIVE-VALIDATED 2026-06-26.** A full self-contained Host overlay built from
`feat/focas-fixedtree-equipment-injection` @ `37cac5de` was deployed to `E:\ApiInstall\OtOpcUa`
(stop → full backup `E:\ApiInstall\OtOpcUa_bak-20260626111416` → robocopy overlay preserving
`appsettings*.json` + `pki\` → restart; `/healthz` 200 Healthy, member Up ADMIN+DRIVER, OPC `:4840`
listening). After the FOCAS driver reconnected, the host log recorded
`DriverHost …: injected 57 discovered node(s) for driver focas-z-34184 under EQ-3686c0272279` and
`AddressSpaceApplier: discovered nodes materialised under EQ-3686c0272279 (folders=14, vars=57)`
no `Exception`/`NotSupportedException`. Verified via the OtOpcUa CLI from a remote client:
- `browse --recursive ns=2;s=EQ-3686c0272279` → new `FOCAS` subtree (`Identity`, `Axes` X/Y/Z/B/C/AA
+ `Actual`, `Spindle/S1`, `Program`, `OperationMode`, `Timers`), stable + idempotent across repeat
browses (no churn); the device-host folder (`10.201.31.5:8193`) was collapsed as designed.
- Sample reads, all **Good** (`Status 0x0`) with fresh/advancing source times:
`FOCAS/Identity/SeriesNumber=G431`, `CncType=31`, `AxisCount=7`,
`FOCAS/Axes/X/AbsolutePosition=2801574` (live), `FOCAS/OperationMode/ModeText=TJOG`.
- Authored `parts-count`/`parts-required` still Good — no regression.
Rollback point retained at `E:\ApiInstall\OtOpcUa_bak-20260626111416`.
@@ -0,0 +1,180 @@
# OtOpcUa equipment-tag data-plane — why live FOCAS values don't surface (investigation plan)
**Date:** 2026-06-25
**Companion to:** [`2026-06-25-focas-pdu-v3-implementation-plan.md`](2026-06-25-focas-pdu-v3-implementation-plan.md) (the FOCAS PDU-v3 driver work — DONE + deployed) and [`../deployments/wonder-app-vd03-makino-z-34184.md`](../deployments/wonder-app-vd03-makino-z-34184.md).
**Goal:** make the deployed Makino tags (`parts-count`/`parts-required` = `MACRO:3901/3902`) actually carry live values over OPC UA, and make the FOCAS FixedTree (Identity/Axes/Timers/…) appear in the served address space.
## The symptom (precise, observed 2026-06-25 after the v3 deploy)
Against `opc.tcp://wonder-app-vd03.zmr.zimmer.com:4840/OtOpcUa` (Security None, anonymous, DisableLogin):
- `read` AND a 30 s `subscribe` of `ns=2;s=EQ-3686c0272279/parts-count` (and `/parts-required`) return **`0x80320000` Bad_WaitingForInitialData** — never a value.
- A recursive browse of the whole served tree shows ONLY `machining → makino → z-34184 → {parts-count, parts-required}`**no FixedTree nodes** (Identity/Axes/Timers/Program/…).
- Unchanged by host-restart, a full AdminUI re-deploy (`12e0d528`, Sealed/In-sync), and a driver `Restart`.
- A ~15 s box-side watch saw **no 250 ms-cadence TCP connection** to `10.201.31.5:8193` — only the occasional probe connect.
## What is RULED OUT (do not re-investigate)
- **The FOCAS PDU-v3 driver.** Proven working: the rewritten `FocasDriverProbe` does a real initiate + `cnc_statinfo` and the AdminUI shows **DRIVER STATUS: HEALTHY, last success Ns ago**; and on the dev box every surface (sysinfo / axes / dynamic / macros / timer / PMC / servo / alarms) reads correctly through the same `WireFocasClient`. The wire path talks v3 to the Makino.
- **Deployment / Akka roles.** Re-deploy sealed with no "task canceled"; the node has admin+driver roles; the address space (the 2 UNS tags) is served.
- **Reachability.** TCP 8193 reachable from the box; OPC 4840 reachable from the dev box.
So the problem is the OtOpcUa **data plane**: the driver's *control/probe plane* runs, but its *data plane* (the FixedTree poll loop output + the equipment-tag value source) is not reaching the served address space. This was present last session too — it was masked by the v3 reject; with v3 now working it stands alone.
## Leading hypotheses (to confirm/refute, not assume)
- **H1 — FixedTree bootstrap is stuck/throwing.** `FocasDriver.FixedTreeLoopAsync` is started at `FocasDriver.cs:160` inside `InitializeAsync` (which demonstrably ran — the probe loop at `:137` is alive). Its bootstrap `while (state.FixedTreeCache is null)` loop (~`:650`) calls `GetSysInfoAsync`/`GetAxisNamesAsync`/optional probes; if one throws it retries every 2 s and the cache never populates → no FixedTree nodes, no poll. The brief 2 s-spaced connects could explain "no steady 250 ms connection."
- **H2 — FixedTree nodes aren't projected under the Equipment model.** Galaxy/FOCAS are now *standard Equipment-kind drivers*; the served tree is the UNS/Equipment projection (configured tags only). Driver `DiscoverAsync` FixedTree auto-nodes may only live in a raw-driver namespace that the UNS browse doesn't show — i.e. FixedTree-under-Equipment may be unsupported-by-design, not a bug.
- **H3 — equipment-tag value source never primed.** A point-read returns the server's cached value; with no active poll/subscription seeding it, that's `WaitingForInitialData`. But a 30 s `subscribe` ALSO stayed bad — so either the monitored-item → `FocasDriver.ReadAsync` wiring isn't firing, or `ReadAsync(MACRO:3901)` returns no value on the box. (Analogy: the ScadaBridge DCL seed-ordering / static-tag false-bad class of bug — see that memory.)
- **H4 — a data-plane gate.** A role/active-node gate (cf. "data-plane roles need `Security:Ldap:GroupToRole` mapped or write/ack is inert") could suppress the read/poll on this single fused node under DisableLogin.
## ⚠️ Observability is the bottleneck
There are currently **no OtOpcUa app logs on the box** (Windows eventlog shows only NSSM start/stop; Serilog has no file sink in this deployment — cf. the MxGateway windev note). **Getting runtime logs is the single highest-leverage step** and gates H1/H3/H4.
---
## Phase 1 — Offline code trace of the data plane (do FIRST; free, no box/CNC)
Trace, in the repo on the dev box, the two paths end-to-end. This may reveal the root cause with zero box access.
1. **Equipment-tag read path:** from an OPC UA `Read`/`CreateMonitoredItem` on `ns=2;s=EQ-…/parts-count` → the server's address-space node → the "FullName→NodeId router" → `FocasDriver.ReadAsync`. Find: who invokes the driver read, whether point-reads vs subscriptions differ, and where/whether a value is cached vs `WaitingForInitialData` is returned. Entry points: `FocasDriver.cs:247` (resolver-produced equipment tags), `FocasDriver.ReadAsync`, and the server-side poll-group / monitored-item sampler (grep `PollGroup`, `MonitoredItem`, the Equipment namespace value provider).
2. **FixedTree path:** `FocasDriver.FixedTreeLoopAsync` (`:644`) → `state.FixedTreeCache``TryReadFixedTree` → how those nodes are added to the address space. Determine whether FixedTree nodes are emitted into the **Equipment/UNS projection** at all, or only a raw-driver namespace (resolves H2). Grep `DiscoverAsync`, `FixedTreeReference`, and the address-space builder for the Equipment kind.
3. **Driver lifecycle/gates:** confirm what conditions start/suppress the data poll vs the probe loop (active-node, role, enabled flags) — `FocasDriver.cs:87181`, the cluster/active-node gate.
- **Output:** a written root-cause hypothesis (which of H1H4) + the specific code site, OR a precise list of what only box logs can disambiguate.
### ✅ Phase 1 FINDINGS (2026-06-25, offline subagent code-trace — DONE)
| Hypothesis | Verdict | Confidence |
|---|---|---|
| **H2** — FixedTree nodes not projected under Equipment model | **CONFIRMED root cause of symptom #2 — by-design** | proven from code |
| **H3** — equipment-tag value source never primed | **CONFIRMED mechanism of symptom #1**; routing code is self-consistent → break is **runtime**, not static | mechanism proven; exact break needs box logs |
| **H4** — role/active-node data-plane gate | **REFUTED** for the read/subscribe path | proven from code |
| **H1** — FixedTree bootstrap stuck/throwing | best fit for symptom #3 (no 250 ms connect), but **moot** for #2 and **not** the cause of #1 | strongly-suggested; needs logs |
| **H5 (new)** — FOCAS tag with blank `deviceHostAddress``BadNodeIdUnknown` | **latent trap** — yields a *Bad* value, not WaitingForInitialData, so it's the *next* failure, not the current symptom | proven from code |
**Why symptom #2 (no FixedTree nodes):** FixedTree nodes are emitted **only** by `FocasDriver.DiscoverAsync` (`FocasDriver.cs:391-494`, gated on `FixedTreeCache`), and `DiscoverAsync` is **never invoked** by the deployed equipment-kind address-space build. Production materializes the served tree **purely from Config-DB Tag rows** (`EquipmentNodeWalker.cs:24-25,161-165`; `OpcUaPublishActor.RebuildAddressSpace``MaterialiseEquipmentTags(composition)` at `OpcUaPublishActor.cs:326-337`). No runtime caller of `IDriver.DiscoverAsync` exists. ⇒ **By-design gap, not a bug.** Surfacing FixedTree under the Equipment tree is a *feature* (author explicit Tag rows per signal, or extend the composition pipeline to merge `ITagDiscovery` output — a substantial `AddressSpaceComposer`/`AddressSpaceApplier` change).
**Why symptom #1 (`0x80320000` even under a 30 s subscribe):** equipment-tag node value is **push-only**. `0x80320000` is the materialization **seed** (`OtOpcUaNodeManager.EnsureVariable:1400-1402`); there is **no `OnReadValue`/server-side sampler** for equipment tags, so both Read and MonitoredItem serve the cached node value. The seed is overwritten **only** by `ForwardToMux → WriteValue` (`OtOpcUaNodeManager.cs:261-281`), which writes even Bad/null. `StatusFromQuality` maps Bad → `0x80000000`, **not** `0x80320000`. ⇒ A persistent `0x80320000` **proves no `AttributeValueUpdate` ever landed** for those two NodeIds. The routing chain (`DriverHostActor.PushDesiredSubscriptions``_nodeIdByDriverRef``DriverInstanceActor.ResubscribeDesired``PollGroupEngine` forceRaise → `OnDataChangeForward``ForwardToMux`) is **provably self-consistent for FOCAS** (`FocasTagConfigModel` writes no top-level `FullName``ExtractTagFullName` returns the raw JSON blob → `FocasEquipmentTagParser.TryParse` resolves it; subscribed ref == published ref == routing-map key == NodeId, all from the same `t.FullName`). So the static mapping is **refuted**; the break is **runtime**: either the FOCAS child was never subscribed those refs, or `_nodeIdByDriverRef` lacked the `(DriverInstanceId, FullName)` key at publish time — most likely a **`DriverInstanceId` attribution mismatch** between the equipment Tag rows and the spawned `DriverInstance`.
**What only box logs (or a ConfigDb snapshot read) can disambiguate for symptom #1 — capture these on the driver-role node after a deploy:**
1. `DriverHostActor` info *"SubscribeBulk pushed {Refs} references across {Drivers} driver(s)"* (`DriverHostActor.cs:1058`) — `Refs<2` ⇒ equipment Tag `DriverInstanceId` ≠ spawned FOCAS child id (attribution mismatch).
2. `DriverInstanceActor` info *"subscribed to {Count} refs"* (`DriverInstanceActor.cs:571`) — absent/`0``_desiredRefs` empty or never reached `Connected`.
3. `DriverHostActor` debug *"no equipment-tag NodeId for ({Driver},{Ref}) — value dropped"* (`DriverHostActor.cs:550`) — present ⇒ `_nodeIdByDriverRef` key miss.
4. `FocasDriver` debug *"FOCAS fixed-tree bootstrap failed … retrying"* every ~2 s (`FocasDriver.cs:686`) ⇒ confirms H1 (`cnc_sysinfo`/`cnc_rdaxisname` failing on the Makino). Needs Debug level + logger-constructed wire client.
5. A driver-side read of `MACRO:3901` (live Makino): `BadNodeIdUnknown` ⇒ H5 blank-`deviceHostAddress` trap; Good value ⇒ wire path fine, gap is purely the OPC-UA equipment subscription wiring.
**Cheaper-than-Phase-2 lead:** the prime suspect (Tag-row `DriverInstanceId` vs `DriverInstance` row mismatch) is checkable by reading the **deployed ConfigDb snapshot** (`OtOpcUaConfig` on `wonder-sql-vd03:1433`) — no service restart, no log-sink change. Worth doing before/instead of the Phase 2 log-sink change.
**Suggested minimal-fix shapes (NOT implemented):**
- Symptom #1: depends on which log line/row fires — if attribution miss, it's a **config/snapshot re-bind** (no code). If a real wiring gap, localize first.
- H5 latent: in `FocasDriver.ReadAsync`/`WriteAsync` device lookup (`:289`,`:346`), when `DeviceHostAddress` is blank and `_devices.Count==1`, resolve to the sole device instead of `BadNodeIdUnknown` (or make the AdminUI field mandatory).
- Symptom #2 (H2): no fix — by-design; surfacing FixedTree is a feature decision.
### ✅ ConfigDb snapshot read (2026-06-25, symptom #1 path — chosen over box logs)
Read live `OtOpcUaConfig` on `wonder-sql-vd03` (query run on-box so the SQL password never left the box). Ground truth:
- **Equipment** `EQ-3686c0272279` (`z-34184`): `DriverInstanceId=focas-z-34184`, `DeviceId=NULL`, `Enabled=True`, `UnsLineId=makino`.
- **Tags** `parts-count`/`parts-required`: both `DriverInstanceId=focas-z-34184`, `DeviceId=NULL`, `DataType=Double`, `AccessLevel=Read`, `FolderPath=NULL`, **`PollGroupId=NULL`**, `TagConfig={"deviceHostAddress":"10.201.31.5:8193","address":"MACRO:390{1,2}","dataType":"Float64"}`.
- **DriverInstance**: one row `focas-z-34184` (`DriverType=Focas`, `Enabled=True`, ns `dev-equipment`, cluster `DEV`).
- **Device** table: **0 rows** — NORMAL: `FocasDriver` builds `_devices` from `_options.Devices` (DriverConfig JSON) not the Device table (`FocasDriver.cs:97`).
**Refuted by this read:** prime-suspect `DriverInstanceId` attribution mismatch (matches exactly) and H5 blank-`deviceHostAddress` (present). The deployed config is **clean**. ⇒ symptom #1 is a pure value-flow-plumbing break. New live leads: **`PollGroupId=NULL`** on both tags (is a poll group required to subscribe/poll?) and the **resolver-registration** path (equipment-tag refs are "resolver-produced, not seeded at `InitializeAsync`" per `FocasDriver.cs:247` — does poll-time `TryResolve` of the JSON-blob ref ever succeed?). A second offline subagent trace of DriverHostActor↔DriverInstanceActor↔PollGroupEngine↔resolver is running to pin the exact broken link.
### ✅ FixedTree feature (symptom #2) — BUILT 2026-06-26 (architecture reality below)
Mapped the composition pipeline. Two address-space paths exist: (1) **Equipment/UNS projection** `AddressSpaceComposer.Compose` (config entities only) → `AddressSpaceApplier.MaterialiseEquipmentTags` → the served `ns=2` tree where `EQ-…` lives; (2) **raw-driver namespace** `GenericDriverNodeManager.BuildAddressSpaceAsync``driver.DiscoverAsync(IAddressSpaceBuilder)`. **Path 2 is DEAD: `BuildAddressSpaceAsync` has no runtime caller and `OpcUaApplicationHost.PopulateAddressSpaces` (its referenced caller) no longer exists.** Even `GalaxyDriver.DiscoverAsync` (`:588`) is reachable only via that dead path — Galaxy surfaces its hierarchy by being **authored as config equipment/tags**, not via discovery. ⇒ In the current Equipment-kind model **every served node is config-driven; `ITagDiscovery`/`DiscoverAsync` is legacy/dead for serving.** So "build the FixedTree feature" is NOT re-wiring an existing path — it's a **new dynamic-node-injection capability** into the Equipment projection, and it must solve a **timing problem**: composition runs at deploy/apply (before the driver connects), but FixedTree data only exists after the driver's async `FixedTreeCache` bootstrap. The far cheaper alternative that yields the same visible result is to **author FixedTree signals as config Tag rows** (each bound to a FOCAS fixed-tree reference) — same mechanism every other equipment tag uses. (The user chose to **build the dynamic feature** over the config-rows alternative.)
**✅ BUILT (2026-06-26).** Implemented as a generic **post-connect `ITagDiscovery` injection pipeline**: when a driver reaches `Connected`, `DriverInstanceActor` runs bounded re-discovery into a capturing `IAddressSpaceBuilder` and ships `DiscoveredNodesReady` to `DriverHostActor`, which maps the nodes under the equipment (`EQ-…/FOCAS/…`, read-only), extends the `_nodeIdByDriverRef` routing map, and tells `OpcUaPublishActor` to incrementally materialise them — reusing the existing materialize→subscribe→poll→push pipeline (no full rebuild). Survives redeploys (re-applied at the tail of `PushDesiredSubscriptions`) and restarts (re-discovered on reconnect). Design: [`2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](2026-06-26-otopcua-fixedtree-equipment-injection-design.md); implementation plan (11 bite-sized tasks, all green): [`2026-06-26-otopcua-fixedtree-equipment-injection.md`](2026-06-26-otopcua-fixedtree-equipment-injection.md). **Offline-complete** on branch `feat/focas-fixedtree-equipment-injection` (solution build 0 errors / 0 warnings; Runtime.Tests 312, OpcUaServer.Tests 304, FOCAS 247 + an end-to-end injection+value-flow test, all green). The review chain caught + fixed three real defects (a `DriverDataType→OPC-UA-type` string mismatch, a `Server.ReportEvent`-under-lock deadlock, and a `ConfigureAwait(false)` off-actor-context crash for async drivers). **Live wonder validation pending** (deploy the current host + browse `ns=2;s=EQ-3686c0272279/FOCAS/Identity/SeriesNumber`, `…/FOCAS/Axes/X/AbsolutePosition`).
### 🎯 ROOT CAUSE — symptom #1 (CONFIRMED, 2026-06-25, 2nd subagent trace + code verify)
**The FOCAS poll read hangs forever because (1) all wire I/O for a device shares one socket with NO serialization, and (2) the steady-state read has NO timeout.**
- **Unsynchronized shared socket:** `FocasDriver.EnsureConnectedAsync` (`:1101-1128`) returns the single `device.Client` with no I/O mutex. `DeviceState.ProbeLock` (`:1172`) only synchronizes probes with each other. Four independent loops issue wire ops on that same socket concurrently: the equipment poll (`PollGroupEngine`, 1 s — `SubscriptionPublishingInterval` const `DriverHostActor.cs:58`), the FixedTree loop (250 ms / 2 s bootstrap), the probe (5 s), the recycle loop. FOCAS/2 is strict request-response on one socket → concurrent unsynchronized reads interleave; one reader consumes another's response PDU and the victim read blocks waiting for bytes that never come.
- **No read timeout:** the poll calls `FocasDriver.ReadAsync` (`:295-308`) → `WireFocasClient.ReadAsync` (`:417`) → `ReadMacroAsync` **without** the optional `timeout``FocasWireClient.CreateCallTimeout` (`:843-848`) skips `CancelAfter` when `timeout` is null → `ReadExpectedPduAsync` awaits the socket under a token that never fires. The blocked first `forceRaise` poll (`PollGroupEngine.cs:119`) never completes → `onChange` never fires → the `BadWaitingForInitialData` materialization seed (`OtOpcUaNodeManager.cs:1400`) is never overwritten → permanent `0x80320000`. (Connect `:1119` and probe `:604-610` ARE bounded — only the steady-state read isn't.)
**Why it works on the dev box but not deployed:** the dev-box harness/CLI does ONE read at a time with no FixedTree loop running → no socket collision → macros read fine (proven in the v3 work). Deployed, the FixedTree loop races the equipment poll on one socket → collision → hung read.
**Why both tags fail together:** they're in one driver's single 1 s subscription batch; `PollOnceAsync` reads them sequentially and hangs on the first, so neither reaches `onChange`.
**Ruled out:** `PollGroupId=NULL` is a red herring — verified at `ConfigComposer.cs:38` (snapshots all tags, no poll-group filter), `AddressSpaceComposer.Compose` (never reads `PollGroupId`), and `DriverHostActor.PushDesiredSubscriptions:965-973` (groups by `DriverInstanceId`, constant 1 s interval, no poll-group keying). Routing-key mismatch ruled out: subscribe ref == routing key == `onChange` ref (all from the same `FullName`). Not-subscribed ruled out by elimination: a *served* node implies it's in the same `ParseComposition` artifact used for subscribe; and every *deterministic* `ReadAsync` outcome pushes a specific Bad code (`BadNodeIdUnknown`/`BadCommunicationError`/`BadNotSupported`), none of which is `0x80320000` — only a hung (never-returning) read leaves the seed intact.
**PROPOSED FIX (two parts, FOCAS-driver-only, single managed DLL, no migration/proto change — same low-blast-radius deploy path as the v3 work):**
1. **Serialize per-device wire I/O** — an async gate (`SemaphoreSlim`) on `DeviceState` held around each `EnsureConnected + wire op` so the equipment poll, FixedTree loop, probe, and recycle never collide on the one socket. *This is what makes values actually read Good.* MUST be paired with #2 (a lock around an unbounded read would deadlock all I/O).
2. **Bound every steady-state wire read/write** with `_options.Timeout` (mirror the probe's linked CTS at `:604-610`, or thread `timeout` through `WireFocasClient.ReadAsync``ReadMacroAsync`). Converts any stall into a recoverable `BadCommunicationError` push (overwrites the seed, downgrades health → observable + self-healing) instead of permanent silence.
**Confidence:** the two structural gaps are *proven from code*; that their interaction is the active trigger is *strongly-suggested* (works single-threaded on dev box, hangs deployed; fits every observation). Definitive live confirmation = the fix makes the tags leave `0x80320000` (Good, or recoverable BadComm if the macro read genuinely fails). **This is a debugging fix headed for a production CNC node — implement + unit-test (serialization + read-timeout) locally, then deploy to wonder and verify, per the diagnose-before-deploy discipline.**
### Implementation + deploy status (2026-06-26)
- **Fix implemented** on branch `fix/focas-poll-io-serialization` (off `feat/focas-pdu-v3`): new `SynchronizedFocasClient` decorator (per-device `SemaphoreSlim` gate + per-call `_options.Timeout`) wired into `FocasDriver.EnsureConnectedAsync`; `ReadAsync`/`WriteAsync` now map a per-call timeout (OCE while caller token live) → `BadCommunicationError` instead of rethrowing. **243 FOCAS tests green** (8 new in `FocasIoSerializationTests.cs` + 1 sibling in `FocasReadWriteTests`), full solution builds 0 errors.
- **Live baseline re-confirmed** via OtOpcUa CLI from the Mac: both tags `[80320000]`.
- **Deployed** the Release DLL to wonder (single managed-DLL swap; v3 DLL 222208 B → fix DLL 228864 B; backup `E:\ApiInstall\OtOpcUa\_focasbak-pre-iofix-20260626T041913.dll`; service Running, OPC 4840 listening).
- **BLOCKER for verification:** after the swap+restart the FOCAS driver is **not connecting at all** (zero TCP to `10.201.31.5:8193` at 150 s uptime — not even a probe connect), so tags still `[80320000]`.
- **Deploy API enabled** (user-approved): added `Security__DeployApiKey` (64-char generated, value on-box only) to the `OtOpcUaHost` service `Environment` REG_MULTI_SZ + restart; env backed up to `_envbak-pre-deploykey-20260626T043001.txt`. Headless `POST http://localhost:9000/api/deployments` (key read from registry on-box) returned **HTTP 202 Accepted**, sealed deployment `ef384b04…`.
- **Tags STILL `[80320000]`** after the sealed deploy. **Root of the live blocker (from box logs):** the service writes Serilog to `C:\Windows\System32\logs\otopcua-<date>.log` (relative-path-from-service-CWD bug — pending.md D2). Today's log shows the **admin/publish side only**: `Phase7Applier: hierarchy materialised (areas=1,lines=1,equipment=1)` + `equipment tags materialised (tags=2)` at each restart (04:19, 04:30) — which is why the 2 tags are served with the `0x80320000` seed. But across the **entire** day's log there are **ZERO** driver-side lines: `DriverHost`=0, `SubscribeBulk`=0, `subscribed to`=0, `Focas`=0, `GenericDriver`=0 (the 13 `DriverInstance` hits are all SQL `FROM [DriverInstance]`). **⇒ the DriverHostActor / driver-role side is not spawning the FOCAS driver at all on this node today**, so there is no poll, no value push, permanent seed. Health: `/healthz` Healthy, Akka member **Up**, `admin-leader` **Active**; roles env intact (`Cluster__Roles__0=admin`, `__1=driver`, seed=self:4053); `DOTNET_ENVIRONMENT=Production`. The admin half is fully alive; only the driver half is silent.
- **This is a SEPARATE blocker from the I/O fix** (which is correct, unit-tested, and deployed — but cannot be exercised until the driver actually polls).
### Driver-host activation diagnosis (2026-06-26, Debug-logging pass + code reads)
- **Debug Serilog enabled** (env `Serilog__MinimumLevel__Default=Debug` + EF/Microsoft/System→Warning; env bak `_envbak-pre-debug-*.txt`). Revealed: **NO exceptions / load errors anywhere** (my fix DLL is NOT the cause), but also **ZERO Akka lines** in Serilog (`Akka`/`Member`/`akka.tcp`/`singleton` = 0).
- **`DriverHostActor` logs via `Context.GetLogger()` (Akka `ILoggingAdapter`), and there is NO `Akka.Logger.Serilog` bridge anywhere in the app** (verified across `*.cs`/`*.csproj`/HOCON). So Akka actor logs go to the default console logger → **discarded for a Windows service → invisible in BOTH Production AND Development.** `appsettings.Development.json` only changes Serilog min-levels (+ `Ldap:DevStubMode=true`); it adds no Akka bridge. ⇒ **`DOTNET_ENVIRONMENT=Development` would NOT surface the driver host** (held off — futile + flips DevStubMode on prod).
- **The `DriverHostActor` IS instantiated** (unconditionally, same `system.ActorOf` block as the working `OpcUaPublishActor` at `Runtime/ServiceCollectionExtensions.cs:~232`). So it runs — it just isn't producing a connected FOCAS driver, and logs via Akka so the reason is invisible.
- **STRONG HYPOTHESIS (explains everything):** on bootstrap the `DriverHostActor` *recovers* its persisted state (its own logs name `entering Steady` / `recovered Applied state at rev {Rev}`), but a process restart has already killed the live `DriverInstanceActor`s + their FOCAS sockets — and **recovery does NOT re-instantiate them**. Drivers re-spawn only on a deployment dispatch whose **revision changes**. This config is now **stable** (`revisionHash 924b59097eba…` is identical across `0c2db588`/`ef384b04`/`a2a84646` — every `POST /api/deployments` returns 202 but with the SAME rev), so repeated deploys are no-ops for the driver host → after any restart there are **no live drivers** and nothing re-spawns them → zero Makino TCP → tags frozen at the `0x80320000` materialization seed. The **prior session got HEALTHY because the config was being *authored*** (each deploy had a NEW rev → forced a spawn); once authoring stopped, a restart leaves the driver side dark. ⇒ likely the real reason "host-restart didn't change it" in the original symptom.
- **Implication:** to exercise the I/O fix live, force a driver re-spawn by bumping the revision (e.g. toggle the FOCAS DriverInstance `Enabled` off→on, or any benign config edit, then deploy) — reversible. Separately, "restart should re-spawn applied drivers" + "Akka logs should bridge to Serilog" are real OtOpcUa robustness/observability gaps (the latter = the observability bottleneck this plan flagged).
- **Box env mutations still in place (to revert when done):** `Security__DeployApiKey` (deploy API), `Serilog__MinimumLevel__*` (Debug). Backups: `_envbak-pre-deploykey-*`, `_envbak-pre-debug-*`.
### Akka→Serilog observability attempt (2026-06-26) — code done, prod deploy BLOCKED by self-contained layout
- **Code change (correct, builds 0 errors):** added `Akka.Logger.Serilog` 1.5.60 (deps all satisfied by existing pins — Akka 1.5.62 / Serilog 4.3.1) to `Directory.Packages.props` + the Cluster csproj, and `loggers=["Akka.Logger.Serilog.SerilogLogger, …"]` + `loglevel=DEBUG` + `logger-startup-timeout=30s` to the embedded `Resources/akka.conf`. This bridges the DriverHostActor's Akka `ILoggingAdapter` into the Serilog file sink.
- **Prod deploy FAILED TWICE (both auto-rolled-back; service is UP + 4840 listening the whole time after each):**
1. Cluster.dll + Akka.Logger.Serilog.dll swap → crash `FileNotFoundException: Could not load 'Akka.Logger.Serilog 1.5.60'` from `Serilog.Settings.Configuration.ConfigurationReader.LoadConfigurationAssemblies` (the deployed `OtOpcUa.Host.deps.json` didn't list the new assembly; Serilog's `*.Serilog` config-assembly scan then fails fatally).
2. Added the updated `OtOpcUa.Host.deps.json` → crash **"Could not resolve CoreCLR path."** ⇒ the box deployment is **SELF-CONTAINED** (bundles its own runtime), so a `dotnet build` (framework-dependent) deps.json breaks the apphost's runtime resolution.
- **Conclusion:** adding a NEW assembly to this box requires a **full self-contained publish-overlay** (match the box layout; preserve `appsettings*`/`data\`), NOT a DLL/deps.json swap. That's a heavy/risky prod op for what is *diagnostic* observability of the (separate, pre-existing) driver-host re-spawn issue.
- **Prod state now (verified healthy):** FOCAS **I/O-fix DLL still deployed** (228864 B — untouched by these rollbacks), Cluster.dll + deps.json restored to baseline, `Akka.Logger.Serilog.dll` renamed `.disabled`, service Running + OPC 4840 listening. Deploy-API key + Debug-Serilog env still set.
- **RECOMMENDED PIVOT:** the cheap, no-prod-change way to validate the I/O fix is to **force a driver re-spawn via a benign config rev-bump** (the leading hypothesis); and the *proper* driver-host re-spawn fix + the Akka-observability are best developed in a **local docker-dev repro** (safe, full logging, one clean publish-overlay deploy at the end) rather than iterated on the production CNC node. **(User chose: move proper fix to local docker-dev.)**
### 🎯 ROOT CAUSE of the driver-not-spawning — STALE DEPLOYED BINARY (CONFIRMED 2026-06-26)
The DriverHostActor's bootstrap-recovery **already re-spawns drivers in the current source**: `BootstrapRecover` `case Applied:` logs `"recovered Applied state at rev"` and calls `RestoreApplied()` (`DriverHostActor.cs:449-457`), which does `ReconcileDrivers` (re-spawn) + `RebuildAddressSpace` + `PushDesiredSubscriptions` (`:910-918`) — comment: *"the in-memory driver children + OPC UA address space were lost on restart … re-spawn … instead of waiting for a config change (whose identical-config revision would no-op)."* Added by `b1b3f3ff` (2026-06-06) + `397f9b78` (2026-06-07); both ancestors of `feat/focas-pdu-v3`; covered by existing `Restore_on_bootstrap_*` tests.
- **The DEPLOYED wonder `Runtime.dll` is the June-16 base install** (`mtime 2026-06-16 13:05:46`; only the FOCAS *driver* DLL was ever swapped). Binary string scan: `RestoreApplied`/`ReconcileDrivers` method names PRESENT, but the bootstrap-restore log strings `"recovered Applied state at rev"` + `"restored served state for applied deployment"` are **ABSENT** ⇒ the deployed binary **predates b1b3f3ff** and does **NOT** restore served state / re-spawn drivers on bootstrap. **That is exactly why, on this stable-config box, a restart leaves the driver side dark** (and why only config-authoring re-deploys ever lit it up).
- **⇒ No new driver-host fix is needed — it already exists in source.** The proper remedy = **deploy the current Host** (full self-contained publish-overlay onto `E:\ApiInstall\OtOpcUa`, preserving `appsettings*`/`data\`), which in one shot delivers: (1) `RestoreApplied`-on-bootstrap (drivers re-spawn after restart), (2) the FOCAS I/O serialization fix (symptom #1), (3) the Akka→Serilog bridge (observability). The earlier DLL-swap crashes were purely the self-contained-layout mismatch — a full publish-overlay is internally consistent and avoids them.
- **PLAN:** confirm the current Host build boots cleanly + re-spawns drivers on restart in **docker-dev** (safe), then do ONE self-contained publish-overlay to wonder + verify tags leave `0x80320000`.
### ✅ docker-dev confirmation (2026-06-26) — current build re-spawns on bootstrap + Akka bridge works
Built the current source into the docker-dev image (`otopcua-host:dev`), booted `central-1` (fused admin+driver, like wonder). Results:
- **Boots cleanly** with `Akka.Logger.Serilog` (the prod DLL-swap crashes were purely the self-contained-layout mismatch, now confirmed).
- **Akka→Serilog bridge works** — but needed TWO fixes beyond the package: (1) Akka.Hosting owns logger setup so HOCON `akka.loggers` is ignored → wire via `ConfigureLoggers(setup => { setup.LogLevel=DebugLevel; setup.ClearLoggers(); setup.AddLogger<SerilogLogger>(); })` in `WithOtOpcUaClusterBootstrap`; (2) `AddZbSerilog` registers Serilog as the MEL provider but does NOT set the static `Serilog.Log.Logger` (which `Akka.Logger.Serilog` writes to, AND which the Program.cs startup banner uses) → set `Serilog.Log.Logger = app.Services.GetRequiredService<Serilog.ILogger>()` in Program.cs right after `Build()`. With both, the startup banner + full Akka cluster/DriverHost logs now emit. *(The `Log.Logger`-unset gap is a latent bug in the shared `AddZbSerilog` lib affecting all 3 apps' static-`Log` calls — worth a follow-up there.)*
- **`RestoreApplied` re-spawns drivers on bootstrap — CONFIRMED live:** `DriverHost central-1: recovered Applied state at rev …``spawned GalaxyMxGateway/Modbus/OpcUaClient driver` (×3) → `SubscribeBulk pushed 5 references across 3 driver(s)``restored served state for applied deployment … on bootstrap``DriverInstance …: subscribed to N refs`. These are the exact strings ABSENT from the stale wonder binary. ⇒ deploying the current Host fixes the wonder driver-not-spawning blocker.
### Deploy plan (self-contained publish-overlay → wonder)
- Local: `dotnet publish …Host.csproj -c Release -r win-x64 --self-contained true -p:PublishSingleFile=false` (self-contained = brings its own runtime; no version-match concern with the box).
- Zip + SFTP to `win64`. Box overlay (self-healing): full app-dir backup → robocopy publish over `E:\ApiInstall\OtOpcUa` **EXCLUDING `pki\` (OPC server cert) + `appsettings*.json`** → start → verify (4840 + tags) → AUTO-ROLLBACK from backup if unhealthy. Env (deploy key, Debug logging) is registry-side, untouched by the file overlay.
- Then revert the Debug-Serilog env (now superseded; verbose) and verify `parts-count`/`parts-required` read Good (or recoverable BadComm), and that the FixedTree/driver values flow.
### ✅✅ SYMPTOM #1 FIXED + LIVE-VALIDATED ON WONDER (2026-06-26)
After the self-contained overlay (current Host) + two light single-DLL FOCAS swaps, the validation revealed — and each fix peeled back — a **cascade of latent FOCAS-config-vs-driver mismatches** the stale binary had masked. Final state, all live on wonder:
- `DriverHost …: recovered Applied state … → spawned Focas driver focas-z-34184 (stub=false) → DriverInstance focas-z-34184: connected → subscribed to 2 refs`**two ESTABLISHED TCP sessions to `10.201.31.5:8193`**.
- **`read ns=2;s=EQ-3686c0272279/parts-count` → Value 0, Status `0x00000000` (Good)**; `parts-required` → Good. (Value 0 is correct on the idle machine — status, not magnitude.) The original `0x80320000` is gone.
**The complete fix chain (all on `fix/focas-poll-io-serialization`, deployed):**
1. **FOCAS I/O serialization + read timeout** (`SynchronizedFocasClient`) — the original diagnosed root cause: prevents the poll read hanging on the shared socket.
2. **`RestoreApplied`-on-bootstrap** — already in source since `b1b3f3ff`; the wonder fix was deploying the current Host over the stale June-16 binary so the driver re-spawns on restart.
3. **Akka→Serilog bridge** (`ConfigureLoggers().AddLogger<SerilogLogger>()` + set static `Serilog.Log.Logger` in Program.cs) — made the driver-host actor observable; this is what surfaced the next two issues.
4. **`FlexibleStringConverter`** on the FOCAS config `Series` — the AdminUI persists the enum as a number (`"series":6`); the factory now tolerates number-or-string instead of throwing → stub.
5. **Scheme-less host tolerance** in `FocasHostAddress.TryParse` — the AdminUI persists `hostAddress` as a bare `ip:port`; `TryParse` now accepts it (canonical `focas://` unchanged) instead of failing init.
- FOCAS test suite **247 green**; each fix carries a regression test.
- **Follow-up (product quality):** the AdminUI authors FOCAS configs (`series` as number, `hostAddress` without `focas://`) that the driver only now tolerates — the AdminUI↔driver config-format mismatch is worth reconciling at the source. Also: the shared `AddZbSerilog` not setting static `Serilog.Log.Logger` is a latent gap across all 3 apps. The FixedTree-under-Equipment feature (task #14) is now **BUILT** (offline-complete; see the 2026-06-26 design + implementation-plan docs above) — live wonder validation pending.
## Phase 2 — Get OtOpcUa runtime logs on wonder
Make the Host emit driver-level logs so the data plane is observable. Options (least invasive first): point the service at a Serilog file sink via config/env, or temporarily run with `DOTNET_ENVIRONMENT=Development` (file sink + dev errors — cf. MxGateway note), or add a console capture. Preserve `appsettings*`/`data\`; restore the env after. Then read: did `InitializeAsync` start the FixedTree loop, does the bootstrap throw (and on which call), is `ReadAsync` invoked for the equipment tags, what does it return.
- **Access:** servecli `:2222`, key `~/.ssh/servecli_wonder`, `scratchpad/wonder-ps.sh` (base64 PS over the cmd PTY), SFTP root `C:\Users\dohertj2\Desktop\win64`.
- **Output:** the actual runtime behavior of the data poll + equipment read on the box.
## Phase 3 — Local reproduction in docker-dev (isolate FOCAS-specific vs general)
Reproduce off the production box: in the docker-dev OtOpcUa, configure an equipment tag bound to a driver and check whether values flow at all. Use a driver with an easy local source (e.g. Modbus against a local sim, or the FOCAS wire client against a v1 responder if one can be stood up). If equipment-tag values flow for another driver locally but not FOCAS → FOCAS-specific; if they don't flow for any → a general equipment-projection/data-plane gap. A local repro gives a full-logging debug loop.
- **Output:** scope (FOCAS-only vs general) + a local failing case to fix against.
## Phase 4 — Root cause + minimal fix
From Phases 13, fix the smallest thing that makes the equipment tag carry a value and (if H2 is not by-design) the FixedTree surface. Likely shapes: a swallowed bootstrap exception; a seed/poll-group wiring gap; an Equipment-projection that should include FixedTree/driver auto-nodes; or a data-plane gate that needs config on the wonder node.
## Phase 5 — Validate
- Local (docker-dev) green where reproduced; unit/integration tests for the fixed path.
- Live: re-deploy to wonder, then via the OtOpcUa CLI confirm `parts-count`/`parts-required` read **Good** (value 0 is correct on this idle machine — assert status, not magnitude) and, if applicable, FixedTree Identity/Axes nodes appear with live values. The live Makino helps for FixedTree but is NOT blocking for the value-flow plumbing (macro values are 0 regardless), so this is lower time-pressure than the v3 capture was.
## Phase 6 — Docs + commit
Update this plan's status + the deployment doc; commit on a branch (separate from `feat/focas-pdu-v3` if the fix is OtOpcUa-core rather than FOCAS-driver). Push per the repo flow when asked.
---
## Notes
- **Execution approach:** this is a *debugging* investigation (unknown root cause) — diagnose before fixing; reproduce before claiming a fix; change the smallest thing. Don't deploy a guessed fix to the production CNC node.
- **Key node IDs / endpoints:** equipment `ns=2;s=EQ-3686c0272279`; tags `…/parts-count` (`MACRO:3901`), `…/parts-required` (`MACRO:3902`); OPC `opc.tcp://wonder-app-vd03.zmr.zimmer.com:4840/OtOpcUa`; AdminUI `http://wonder-app-vd03.zmr.zimmer.com:9000` (DisableLogin); deploy = AdminUI `/deployments` → "Deploy current configuration" (Blazo→Akka; no headless API).
- **Reusable tools:** `scratchpad/focas-status/` (live IFocasClient harness), `scratchpad/wonder-ps.sh`, `scratchpad/deploy-focas-v3.ps1`; the OtOpcUa CLI `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI` (`read`/`subscribe`/`browse --recursive`).
@@ -0,0 +1,110 @@
# FixedTree → Equipment injection — RESUME / work-left handoff
**Date:** 2026-06-26
**Purpose:** survive a context compaction; let a fresh session continue without re-deriving state.
---
## TL;DR
The **FixedTree-under-Equipment dynamic-injection feature is BUILT, offline-complete, AND
✅ LIVE-VALIDATED on wonder (2026-06-26)** — 11 tasks, all reviewed, full offline suite green, final
integration review = ready to merge, and the real OPC injection confirmed on `wonder-app-vd03` (57 nodes
grafted under `EQ-3686c0272279`, all reading Good live values). It lives on a **local, unpushed** branch.
The only substantive thing left is the user's decision on push/PR/merge (§1). A few documented non-blocking
follow-ups remain (§3).
## Git state (exact)
- **Branch:** `feat/focas-fixedtree-equipment-injection` (in the main working dir `/Users/dohertj2/Desktop/OtOpcUa`, NOT a worktree).
- **Base:** branched off `fix/focas-poll-io-serialization` (the symptom-#1 data-plane fix — itself ahead of `master`, pushed to gitea with its own open PR, NOT merged). So this feature **stacks on an unmerged branch**.
- **Commits:** 14, range `da55c69`..`37cac5de` (10 task commits + 4 review-fix/docs commits). All **local — nothing pushed.**
- **User decision (2026-06-26):** finishing-a-development-branch → **"Keep as-is."** Do NOT push/merge/discard without an explicit new go-ahead. Standing rule: **commit/push only when asked.**
- **Untouched pre-existing working-tree edits** (leave alone; never stage): `CLAUDE.md`, `docker-dev/docker-compose.yml`, `pending.md`, `stillpending.md`, `docs/plans/2026-06-19-followups-batch.md.tasks.json`.
- This RESUME doc itself is currently **uncommitted** (a working artifact).
## What the feature does
Generic post-connect `ITagDiscovery` injection (NOT FOCAS-special-cased). On driver Connect:
`DriverInstanceActor` runs bounded re-discovery (Timers single-tick, generation-guarded, stop-on-stable +
attempt cap, re-kicks on reconnect) into a capturing `IAddressSpaceBuilder` → ships `DiscoveredNodesReady`
`DriverHostActor` resolves the equipment via authored `EquipmentTags`, maps the nodes under
`EQ-…/FOCAS/…` (read-only; single device-host folder collapsed) via `DiscoveredNodeMapper`, extends
`_nodeIdByDriverRef`, caches the plan, Tells `OpcUaPublishActor.MaterialiseDiscoveredNodes`
`AddressSpaceApplier` → sink `EnsureFolder`/`EnsureVariable` + `RaiseNodesAddedModelChange` (NodeAdded), and
re-sends `SetDesiredSubscriptions(authored FixedTree refs)` so values flow through the existing
poll→push path. Survives redeploys (re-applied at the tail of `PushDesiredSubscriptions` from the cache)
and restarts (re-discovered on reconnect).
## Verification (offline) — all green as of 2026-06-26
- `dotnet build ZB.MOM.WW.OtOpcUa.slnx`**0 errors, 0 warnings** (TreatWarningsAsErrors on).
- `dotnet test … --filter "FullyQualifiedName~Runtime.Tests"`**312 passed**.
- `dotnet test … --filter "FullyQualifiedName~OpcUaServer.Tests"`**304 passed**.
- `dotnet test … --filter "FullyQualifiedName~FOCAS"`**324 passed, 10 skipped** (the skips are live-wire integration tests needing the physical CNC — expected).
- Final integration review: **ready to merge** (3 non-blocking Minors — see Follow-ups).
- Known env limitation (not a failure): the net48 `Driver.Historian.Wonderware.Tests` can't run its testhost on macOS — run the **filtered** suites above, not a full-solution `dotnet test`.
## Key files / anchors
- Design: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md` (status = Implemented; has the follow-ups).
- Plan + task journal: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection.md` (+ `.md.tasks.json`, all tasks completed).
- Investigation plan (symptom #2 marked BUILT): `docs/plans/2026-06-25-otopcua-equipment-dataplane-investigation.md`.
- Deployment doc (FixedTree section added): `docs/deployments/wonder-app-vd03-makino-z-34184.md`.
- New code:
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNode.cs`, `CapturingAddressSpaceBuilder.cs`, `DiscoveredNodeMapper.cs`
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/DiscoveredInjection.cs` (DTOs)
- modified: `DriverInstanceActor.cs`, `DriverHostActor.cs`, `OpcUaPublishActor.cs`, `AddressSpaceApplier.cs`, `OtOpcUaNodeManager.cs`, `IOpcUaAddressSpaceSink.cs` (+ `SdkAddressSpaceSink.cs`, `DeferredAddressSpaceSink.cs`)
- tests: `tests/Server/…Runtime.Tests/Drivers/{CapturingAddressSpaceBuilderTests,DiscoveredNodeMapperTests,DriverInstanceActorDiscoveryTests,DriverHostActorDiscoveryTests,DiscoveryInjectionEndToEndTests}.cs`, `…OpcUaServer.Tests/NodeManagerModelChangeOnAddTests.cs`, edits to `AddressSpaceApplierTests.cs`/`OpcUaPublishActorTests.cs`.
- Memory: `…/memory/wonder-otopcua-focas-and-akka-roles.md` (RESUME-ANCHOR bullet updated to record this feature; read it for the broader wonder/FOCAS context + box-access recipe).
## WORK LEFT (prioritized)
### 1. Decide the git endgame (user-gated)
Pick one, only on explicit user go-ahead:
- **Push + PR** — `git push -u origin feat/focas-fixedtree-equipment-injection`; PR base is `fix/focas-poll-io-serialization` (stacked) or `master` (will show both features' commits). gitea repo: `lmxopcua`.
- **Merge locally** into `fix/focas-poll-io-serialization` (folds both features onto one branch/PR).
- Keep waiting until after live validation (current state).
### 2. Live wonder validation — ✅ DONE 2026-06-26
**Validated live on `wonder-app-vd03`.** Built a full self-contained Host overlay from this branch @
`37cac5de`, deployed to `E:\ApiInstall\OtOpcUa` (stop → backup `E:\ApiInstall\OtOpcUa_bak-20260626111416`
→ robocopy overlay preserving `appsettings*.json` + `pki\` → restart). Baseline before deploy: only
`parts-count`/`parts-required` under `EQ-3686c0272279`. After deploy + FOCAS reconnect: the host log
recorded `injected 57 discovered node(s) … under EQ-3686c0272279` / `materialised … (folders=14, vars=57)`,
no exceptions. CLI browse showed the full `FOCAS/` subtree (Identity/Axes X-Y-Z-B-C-AA+Actual/Spindle/
Program/OperationMode/Timers), idempotent across repeats, device-host folder collapsed. Sample reads all
Good: `Identity/SeriesNumber=G431`, `CncType=31`, `AxisCount=7`, `Axes/X/AbsolutePosition=2801574` (live),
`OperationMode/ModeText=TJOG`; authored tags still Good (no regression). `/healthz` 200 Healthy throughout.
Result recorded in `docs/deployments/wonder-app-vd03-makino-z-34184.md`. **The substantive remaining work
is now the git endgame (§1) only.** Original recipe retained below for reference:
The offline e2e asserts the recording-sink contract, NOT the real `OtOpcUaNodeManager` seed→overwrite at
the OPC node layer. Live validation closes that gap. Recipe (mirrors the symptom-#1 deploy):
1. Build the current Host self-contained: `dotnet publish src/…/ZB.MOM.WW.OtOpcUa.Host…csproj -c Release -r win-x64 --self-contained true -p:PublishSingleFile=false`. **Must be a full self-contained publish-overlay, NOT a DLL swap** — the box is self-contained (DLL swaps crashed: FileNotFound / "Could not resolve CoreCLR path"). Note: deploying the current Host already happened for symptom #1; if the box is at the symptom-#1 build, this feature's DLLs (Runtime + OpcUaServer + Commons + the new Runtime/Drivers files) must be included in the overlay — so a fresh full overlay from THIS branch is the safe path.
2. Box access: servecli `:2222`, key `~/.ssh/servecli_wonder`, user `dohertj2`; drive via `scratchpad/wonder-ps.sh` (base64 PS over cmd PTY); SFTP root `C:\Users\dohertj2\Desktop\win64`. Service `OtOpcUaHost`. Overlay onto `E:\ApiInstall\OtOpcUa` **preserving `pki\` + `appsettings*.json` + `data\`**; back up first; auto-rollback if unhealthy.
3. Restart `OtOpcUaHost`; confirm member Up w/ ADMIN+DRIVER (roles env already set), `/healthz` Healthy, OPC `:4840` listening.
4. The FOCAS driver connects → ~02 s later FixedTree populates → injection fires. Validate via the OtOpcUa CLI (`src/Client/…Client.CLI`) against `opc.tcp://wonder-app-vd03.zmr.zimmer.com:4840/OtOpcUa` (Security None, anonymous):
- `browse --recursive` → expect a `FOCAS` subfolder under `ns=2;s=EQ-3686c0272279` with `Identity/`, `Axes/`, etc.
- `read ns=2;s=EQ-3686c0272279/FOCAS/Identity/SeriesNumber` → expect Good (a real string).
- `read ns=2;s=EQ-3686c0272279/FOCAS/Axes/X/AbsolutePosition` → expect Good (value may be 0 on idle machine — assert STATUS, not magnitude).
- The authored `parts-count`/`parts-required` should remain Good (symptom #1 fix).
5. If a value reads Bad, the symptom-#1 self-healing applies (recoverable `BadCommunicationError`, observable in Serilog at `C:\Windows\System32\logs\otopcua-<date>.log`). The Akka→Serilog bridge (from symptom #1) makes `DriverHost`/`DriverInstance`/discovery logs visible.
### 3. Non-blocking follow-ups
**✅ ALL FIXEDTREE FOLLOW-UPS (AE) IMPLEMENTED 2026-06-26** — design+plan
`2026-06-26-otopcua-fixedtree-followups{-design,}.md`; 16 commits `c2c368dc`..`0074f37a` on this branch
(every task spec+code reviewed; offline suites green). Resolved:
- ✅ Config-unchanged rebind now re-triggers discovery (`TriggerRediscovery`) — follow-up C.
- ✅ Multi-device-per-driver implemented via `EquipmentNode.DeviceHost` partition; ≥1-authored-tag requirement lifted (driver-binding resolution) — follow-up E (projection-only, no migration / no artifact wire change).
- ✅ Per-(re)connect re-discovery policy-gated (`ITagDiscovery.RediscoverPolicy` UntilStable/Once/Never; synchronous drivers → Once) — follow-up B.
- ✅ Double `SetDesiredSubscriptions` per redeploy de-duped (one send per driver) — follow-up D.
- ✅ Per-pass `DiscoverAsync` timeout made injectable — follow-up A.
**Still open (out of scope for the FixedTree follow-ups — separate cross-cutting work):**
- Cross-cutting (from symptom #1, all 3 apps): shared `AddZbSerilog` doesn't set the static `Serilog.Log.Logger`; AdminUI persists FOCAS config in formats (series-as-number, scheme-less host) the driver only now tolerates — reconcile at the AdminUI source.
## Context that's easy to lose
- 3 real defects were caught + fixed by the review chain during the build: `DriverDataType.ToString()` ≠ OPC type string (`Float64``"Double"`); `Server.ReportEvent` under the node `Lock` (deadlock); `ConfigureAwait(false)` in the discovery handler (off-actor-context crash for async drivers like Galaxy sharing the node). All have regression tests.
- The plan's Task-3 instruction "keep ReportEvent inside lock" was itself a defect; the plan doc was corrected.
- The execution used subagent-driven-development (fresh implementer per task + spec/code reviews; high-risk tasks got Opus reviews, serial). Single-writer discipline was enforced (no concurrent `dotnet` builds → no obj/bin or git-index races).
@@ -0,0 +1,205 @@
# OtOpcUa — dynamic injection of driver-discovered FixedTree nodes into the Equipment projection (design)
**Date:** 2026-06-26
**Status:** ✅ Implemented (2026-06-26) — 11 tasks, offline-complete on branch `feat/focas-fixedtree-equipment-injection` (solution build 0 errors / 0 warnings; Runtime.Tests 312, OpcUaServer.Tests 304, FOCAS 247 + an end-to-end injection+value-flow test, all green). Live wonder validation pending.
**Follow-ups surfaced during the review chain — ✅ ALL RESOLVED 2026-06-26** (design
[`2026-06-26-otopcua-fixedtree-followups-design.md`](2026-06-26-otopcua-fixedtree-followups-design.md),
plan [`2026-06-26-otopcua-fixedtree-followups.md`](2026-06-26-otopcua-fixedtree-followups.md);
16 commits `c2c368dc`..`0074f37a` on this branch, every task spec+code reviewed; full offline suite green):
- ✅ Config-unchanged driver→equipment **rebind** now **re-triggers discovery** (follow-up C): the redeploy re-inject tail drops the stale plan AND `Tell`s the driver child a new `DriverInstanceActor.TriggerRediscovery` (a discovery action — not lifecycle control — idempotent, child no-ops if not Connected), so the FixedTree re-grafts under the new equipment on the next pass instead of waiting for the next natural reconnect.
-**Multi-device-per-driver** mapping **implemented** (follow-up E): `EquipmentNode` now carries `DriverInstanceId`/`DeviceId`/`DeviceHost` (projection-only — the columns + the `Devices` array were already in the artifact, no DB migration / no wire change), so equipment resolves via the driver binding **without** authored tags (≥1-tag requirement lifted), and a driver bound to multiple devices partitions its discovered tree by normalized device-host folder, grafting each device's subtree under the equipment whose `DeviceHost` matches (unmatched hosts warn-skip, never mis-graft).
- ✅ Per-(re)connect re-discovery is now **policy-gated** (follow-up B): `ITagDiscovery.RediscoverPolicy` (`UntilStable`/`Once`/`Never`, default `UntilStable`) — FOCAS stays `UntilStable` (its FixedTree cache fills asynchronously after connect); the synchronous-discovery drivers (OpcUaClient/TwinCAT/AbCip/AbLegacy/Modbus/S7/Galaxy) are `Once`, dropping the wasteful 15× retry. The hardcoded 30 s per-pass discovery timeout is now injectable too (follow-up A).
- ✅ The OPC-node-layer seed→serve gap (recording-sink-only e2e) was closed by the **live wonder deploy** of the base feature (validated 2026-06-26; see the deployment record).
**Companion to:** [`2026-06-25-otopcua-equipment-dataplane-investigation.md`](2026-06-25-otopcua-equipment-dataplane-investigation.md) (symptom #1 — live FOCAS values — FIXED + deployed; this design addresses **symptom #2**).
**Base branch:** `fix/focas-poll-io-serialization` (this feature builds on the now-deployed driver-host bootstrap re-spawn + FOCAS I/O fixes; that branch is ahead of `master` and not yet merged).
---
## Problem
Deployed FOCAS equipment serves only its **authored** config tags (`parts-count`/`parts-required`). The driver's
**FixedTree** (Identity / Axes / Spindle / Program / Timers — the auto-discovered CNC structure) **never appears** under
the served Equipment/UNS address space.
**Root cause (confirmed in the investigation, H2):** the served Equipment tree is built **purely from Config-DB entities**
(`AddressSpaceComposer.Compose``AddressSpaceApplier` → node manager). The only code that emits FixedTree nodes is
`ITagDiscovery.DiscoverAsync` (each driver implements it), reachable **only** through `GenericDriverNodeManager.BuildAddressSpaceAsync`
— which has **no runtime caller** (its referenced host method `OpcUaApplicationHost.PopulateAddressSpaces` no longer exists).
So `DiscoverAsync`/`ITagDiscovery` is **dead for serving**: every served node is config-driven, and nothing surfaces a
driver's discovered hierarchy.
Surfacing FixedTree under the Equipment node is therefore a **new dynamic-node-injection capability**, and it must solve a
**timing problem**: composition runs at deploy/apply time (before the driver connects), but the FixedTree shape
(axis count, spindle presence, which sections exist) is **capability-discovered ~02 s after the driver connects**
(`FocasDriver` populates `state.FixedTreeCache` in its bootstrap loop).
## Goal
After a driver connects, dynamically graft its discovered FixedTree nodes into the served Equipment projection under a
driver-named subfolder, e.g.:
```
ns=2;s=EQ-3686c0272279 (equipment "z-34184")
├── parts-count (authored config tag — unchanged)
├── parts-required (authored config tag — unchanged)
└── FOCAS (NEW — driver-named discovered subfolder)
├── Identity/{SeriesNumber, Version, MaxAxes, CncType, MtType, AxisCount}
├── Axes/{<axis>/{AbsolutePosition, MachinePosition, RelativePosition, DistanceToGo}, FeedRate/Actual, SpindleSpeed/Actual}
├── Spindle/{<name>/{Load, MaxRpm}} (capability-gated)
├── Program/{Name, ONumber, Number, MainNumber, Sequence, BlockCount} (capability-gated)
├── OperationMode/{Mode, ModeText} (capability-gated)
└── Timers/{PowerOnSeconds, OperatingSeconds, CuttingSeconds, CycleSeconds} (capability-gated)
```
Read-only value nodes carrying live values (e.g. `EQ-…/FOCAS/Axes/X/AbsolutePosition` reads Good).
## Decisions (locked with the user 2026-06-26)
| Decision | Choice |
|---|---|
| Driver scope | **Generic** — keyed off the shared `ITagDiscovery` interface (FOCAS, Galaxy, Modbus all implement it). FOCAS is the first/test consumer; others get it for free. **Zero per-driver code changes.** |
| Tree placement | **Under a driver-named subfolder**`EQ-…/FOCAS/…` (collision-safe vs. authored tags; self-describing). |
| Device-host folder | **Collapse** the single device-host level → `EQ-…/FOCAS/Identity/…` (not `EQ-…/FOCAS/10.201.31.5:8193/Identity/…`), valid because today's deployment is strictly 1:1 driver↔equipment↔device. |
| Model-change notification | **Emit `GeneralModelChangeEvent`** after a runtime add so already-connected OPC UA clients can refresh their browse. |
| Multi-device-per-driver | **Deferred** at base-feature time; ✅ **implemented as follow-up E** (2026-06-26) — `EquipmentNode.DeviceHost` partition. |
| Discovered alarms | **Out of scope** — this feature surfaces value nodes only; alarms continue to come via the config path. |
| Writable discovered nodes | **Out of scope** — FixedTree is read-only CNC state. |
## Approach (chosen): runtime post-connect injection via the actor pipeline
Treat discovered FixedTree nodes as **"synthetic equipment tags" injected at runtime**, reusing the existing
materialize → subscribe → poll → push pipeline end-to-end. Only three new pieces; **no driver changes** (each driver's
existing `DiscoverAsync` is reused verbatim via a capturing builder).
**Rejected alternatives:**
- *Composition-time pre-projection* — can't author the right nodes before the driver discovers capabilities; defeats the purpose.
- *Resurrect `GenericDriverNodeManager` as a 2nd namespace (ns=3)* — puts FixedTree in a separate tree (not **under** the equipment node), and that namespace's value-routing is also dead; more dead code to revive, wrong location.
- *Cheap baseline: author a Config-DB Tag row per FixedTree signal* — no new code, but static (can't adapt to per-CNC capabilities) and per-signal × per-machine manual authoring. User chose to build the dynamic feature instead.
## Components
### 1. `CapturingAddressSpaceBuilder` (new — runtime)
An `IAddressSpaceBuilder` implementation that **records** the streamed tree instead of creating OPC UA nodes. After a
driver's `DiscoverAsync(builder)` returns, it exposes a flat `IReadOnlyList<DiscoveredNode>`:
```
DiscoveredNode {
IReadOnlyList<string> FolderPathSegments, // e.g. ["FOCAS", "<deviceHost>", "Identity"]
string BrowseName, string DisplayName,
string FullReference, // == DriverAttributeInfo.FullName (the driver ref + routing key)
DriverDataType DataType, bool IsArray, uint? ArrayDim,
bool Writable, bool IsHistorized
}
```
- `Folder(browse, display)` returns a child capturing scope; `Variable(...)` records a node and returns an
`IVariableHandle` whose `FullReference` is `DriverAttributeInfo.FullName`.
- `MarkAsAlarmCondition(...)` returns a **no-op** sink; `AddProperty(...)` is **ignored** — value nodes only.
### 2. `DriverInstanceActor` — post-connect discovery (bounded retry)
On entering `Connected`, kick a bounded re-discovery:
1. Run `DiscoverAsync(capturingBuilder)` against the live `IDriver` it owns.
2. `Tell` the parent `DriverHostActor` a new message `DiscoveredNodesReady(DriverInstanceId, IReadOnlyList<DiscoveredNode>)`.
3. Because FOCAS suppresses FixedTree until `FixedTreeCache` populates (~02 s), **retry** every ~2 s up to a cap
(~30 s) **or until the captured set stops growing**, then stop. `DiscoverAsync` reads the in-memory cache (no extra
wire I/O), so retries are cheap. Re-runs on every reconnect (downstream is idempotent).
*(Drivers whose discovery is ready immediately — e.g. Galaxy/Modbus — satisfy this on the first attempt.)*
### 3. `DriverHostActor` — injection handler
On `DiscoveredNodesReady(id, nodes)`:
1. Find the equipment bound to the driver instance: `composition.EquipmentNodes` where `DriverInstanceId == id`.
- 0 matches → log Info, skip. >1 match → log Warning, skip (multi-device follow-up).
2. **Dedup** discovered `FullReference`s against authored `EquipmentTags` for that driver (never double-create
`parts-count`, etc.).
3. Map each remaining node to a NodeId `EQ-…/FOCAS/<collapsed-path>/<name>` via `EquipmentNodeIds.Variable(...)`
(collapse the single device-host folder level).
4. **Cache** the mapped result in `_discoveredByDriver[id]` (survives redeploys — see Lifecycle).
5. Update `_nodeIdByDriverRef[(id, FullReference)]` for each.
6. `Tell` `OpcUaPublishActor` a new `MaterialiseDiscoveredNodes(equipmentId, "FOCAS", nodes)`.
7. Merge the new refs into the driver's desired set and re-`Tell`
`DriverInstanceActor.SetDesiredSubscriptions(union, interval, alarmRefs)` — the existing **live path** immediately
re-subscribes (the actor self-`Tell`s `Subscribe` when already `Connected`).
### 4. `OpcUaPublishActor` / node manager — incremental materialize
New message `MaterialiseDiscoveredNodes(equipmentId, driverSubfolder, nodes)`:
- Idempotent `EnsureFolder` / `EnsureVariable` calls (the node manager already supports incremental add under `Lock`
via `AddChild` + `AddPredefinedNode`; `EnsureVariable` early-returns if the node exists).
- Variables materialize **read-only** (no `OnWriteValue`).
- After adding, emit a `GeneralModelChangeEvent` so connected clients can refresh their browse (the full-rebuild path
does not emit one; runtime adds should).
## Data flow (value path — fully reused)
```
SetDesiredSubscriptions(union) → DriverInstanceActor subscribes the FixedTree refs
→ PollGroupEngine polls each ref via FocasDriver.ReadAsync
→ TryReadFixedTree (cache lookup, NO extra wire I/O)
→ onChange → AttributeValuePublished(FullReference)
→ DriverHostActor.ForwardToMux
→ _nodeIdByDriverRef[(driverId, ref)] → AttributeValueUpdate(nodeId, value, quality, ts)
→ OtOpcUaNodeManager writes the node value
```
The routing key is **consistent by construction**: the capturing builder records `handle.FullReference`, which is exactly
the ref the driver publishes (`AttributeValuePublished.FullReference`) and the ref `TryReadFixedTree` matches
(`reference.StartsWith(state.Options.HostAddress + "/")`).
## Lifecycle / re-injection robustness (the timing problem, solved)
- **First connect:** driver connects → ~02 s later `FixedTreeCache` populates → bounded re-discovery catches it → inject.
- **Redeploy with a structural `RebuildAddressSpace`:** the full teardown wipes injected nodes and `PushDesiredSubscriptions`
rebuilds `_nodeIdByDriverRef` from authored tags only. **Fix:** after every `PushDesiredSubscriptions`, `DriverHostActor`
**re-applies its cached `_discoveredByDriver`** (re-materialize + re-map + re-merge refs) — so FixedTree survives
redeploys without re-querying the driver.
- **Process restart:** `_discoveredByDriver` is lost, but `RestoreApplied` re-spawns drivers → each reconnects →
post-connect re-discovery re-injects (same ~02 s delay). Consistent with the symptom-#1 restore behavior already
deployed.
- **Idempotent throughout:** `EnsureFolder`/`EnsureVariable` early-return if present; `_nodeIdByDriverRef` is set-based;
`SetDesiredSubscriptions` is idempotent.
## Error handling
- Discovery throws / driver not ready → bounded retry, then give up quietly (Info); authored tags unaffected.
- No equipment bound to the driver instance, or ambiguous (multi-equipment) → Warning, skip injection.
- A FixedTree ref that fails to read at poll time → flows the same recoverable `BadCommunicationError` push as any
equipment tag (the symptom-#1 fix) — observable, not silent.
## Testing
- **Unit:**
- `CapturingAddressSpaceBuilder` records the tree + refs from a fake `ITagDiscovery` (folders, nested variables,
no-op alarm sink, ignored properties).
- Injector mapping: discovered nodes → `EQ-…/FOCAS/…` NodeIds; dedup against authored tags; device-host-folder collapse.
- `DriverInstanceActor` bounded post-connect re-discovery (set becomes non-empty on the Nth attempt; stops on cap / no-growth).
- `DriverHostActor` `DiscoveredNodesReady` handling + re-inject-after-`PushDesiredSubscriptions`.
- Read-only materialization (no write callback).
- **Integration (docker-dev):** a fake `ITagDiscovery` driver exposing a *delayed* discovery set → assert nodes appear
under the equipment and carry values; verify survival across a redeploy + a process restart.
- **Live (wonder, following the symptom-#1 pattern):** deploy the current Host + this change, browse
`EQ-3686c0272279/FOCAS/Identity/SeriesNumber` and `…/Axes/X/AbsolutePosition`, confirm Good values. The live deploy is
**not** blocking for the build (macro/axes values may be 0 on the idle machine — assert status, not magnitude); confirm
the live-deploy step with the user at execution time.
## Scope / non-goals
- **In:** read-only value nodes for any `ITagDiscovery` driver; 1:1 driver↔equipment; survives redeploy/restart; generic
mechanism with FOCAS as the first consumer.
- **Out (documented follow-ups):** discovered **alarms** injection; multi-device-per-driver-instance mapping; writable
discovered nodes.
## Touched code (anticipated)
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs``DiscoveredNodesReady` handler, `_discoveredByDriver`
cache, re-inject after `PushDesiredSubscriptions`, desired-set merge.
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` — post-connect bounded re-discovery + new message.
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs``MaterialiseDiscoveredNodes` receive.
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs``GeneralModelChangeEvent` emit on runtime add (verify
existing helper).
- New: `CapturingAddressSpaceBuilder` + `DiscoveredNode` DTO (runtime), `EquipmentNodeIds` reuse for mapping.
- Tests under `tests/...Runtime.Tests` / `tests/...OpcUaServer.Tests` and a fake `ITagDiscovery` test double.
## Task tracking
Umbrella native task **#14** (FixedTree feature). Implementation tasks to be generated by writing-plans from this design.
@@ -0,0 +1,759 @@
# FixedTree → Equipment dynamic-injection Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
**Goal:** After an `ITagDiscovery` driver connects, dynamically graft its discovered FixedTree nodes into the served Equipment/UNS OPC UA address space under a driver-named subfolder (`EQ-…/FOCAS/…`), carrying live values — reusing the existing materialize → subscribe → poll → push pipeline.
**Architecture:** Treat discovered nodes as "synthetic equipment tags" injected at runtime. A capturing `IAddressSpaceBuilder` records each driver's `DiscoverAsync` output (zero driver changes); `DriverInstanceActor` runs discovery post-connect (bounded retry, since FOCAS's `FixedTreeCache` populates ~02 s after connect) and ships a `DiscoveredNodesReady` message; `DriverHostActor` maps the nodes under the equipment, extends `_nodeIdByDriverRef` + the desired-subscription set, and tells `OpcUaPublishActor` to incrementally materialize them (idempotent `EnsureFolder`/`EnsureVariable`, no full teardown), emitting a `GeneralModelChangeEvent`. Survives redeploys (re-applied after `PushDesiredSubscriptions`) and restarts (re-discovered on reconnect).
**Tech Stack:** .NET 10, Akka.NET (Akka.Hosting, Akka.TestKit.Xunit2), OPC UA (`OPCFoundation.NetStandard.Opc.Ua`), xUnit v2 + Shouldly.
**Design doc:** [`2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](2026-06-26-otopcua-fixedtree-equipment-injection-design.md). Base branch: `fix/focas-poll-io-serialization` (this builds on the deployed driver-host bootstrap re-spawn + FOCAS I/O fixes; not yet merged to `master`).
**Key code anchors (verified 2026-06-26):**
- `IAddressSpaceBuilder` / `IVariableHandle` / `DriverAttributeInfo``src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
- Reference capturing builder (flat collector): `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Cli/Commands/BrowseCommand.cs:120` (`CollectingAddressSpaceBuilder`).
- NodeId scheme: `src/Core/ZB.MOM.WW.OtOpcUa.Commons/OpcUa/EquipmentNodeIds.cs` (`Variable(equipmentId, folderPath, name)``{parent}/{name}`; `SubFolder``{equipmentId}/{folderPath}`).
- Materialize pattern: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceApplier.cs:248` (`MaterialiseEquipmentTags`) + `SafeEnsureFolder`/`SafeEnsureVariable`.
- Node manager: `OtOpcUaNodeManager.EnsureFolder` (`:1282`), `EnsureVariable` (`:1367`, seeds `BadWaitingForInitialData`), `BuildNodeShapeChangedEvent` (`:1525`, verb `DataTypeChanged` — model for a `NodeAdded` sibling).
- Publish actor receive + materialize calls: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs:217` (Receive block), `HandleRebuild` (`:275`).
- Driver value route: `DriverHostActor.ForwardToMux` (`:525`), `_nodeIdByDriverRef` built in `PushDesiredSubscriptions` (`:1019`, sends `SetDesiredSubscriptions` `:1052`), `ChildEntry` (`:203`), Receive blocks (`:482`, `:512`).
- Driver connect hook: `DriverInstanceActor` `_driver` field (`:110`), `Connected()` (`:317`), transition at `InitializeSucceeded` (`:278`); `SetDesiredSubscriptions` live re-subscribe path (`:340-353`).
- FOCAS discovery (reused verbatim): `FocasDriver.DiscoverAsync` (`:408`) emits `FOCAS/{deviceHost}/<section>/…`; FixedTree leaf `FullName` = `{deviceHost}/{path}`; suppresses FixedTree until `FixedTreeCache` set.
---
## Conventions for every task
- **TDD:** write the failing test first, run it (confirm the expected failure), implement minimally, run again (green), commit.
- **Build:** `dotnet build ZB.MOM.WW.OtOpcUa.slnx` from the repo root.
- **Run a single test class:** `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~<ClassName>"`.
- **Commits:** Conventional Commits, on `fix/focas-poll-io-serialization` (do NOT touch the pre-existing unrelated working-tree edits: `CLAUDE.md`, `docker-dev/docker-compose.yml`, `pending.md`, `stillpending.md`, `docs/plans/2026-06-19-followups-batch.md.tasks.json``git add` only this feature's files).
- **No new dependencies, no proto change, no EF migration.** All edits are within existing projects.
---
## Task 1: `DiscoveredNode` DTO + path-tracking `CapturingAddressSpaceBuilder`
**Classification:** standard
**Estimated implement time:** ~4 min
**Parallelizable with:** Task 3
**Files:**
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNode.cs`
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/CapturingAddressSpaceBuilder.cs`
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/CapturingAddressSpaceBuilderTests.cs`
Unlike the CLI's flat `CollectingAddressSpaceBuilder`, this one **tracks folder nesting** so each variable records its full path segments (e.g. `["FOCAS","10.201.31.5:8193","Identity"]` + browse `SeriesNumber`).
**Step 1: Write the failing test**
```csharp
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using Shouldly;
using Xunit;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
[Trait("Category", "Unit")]
public sealed class CapturingAddressSpaceBuilderTests
{
[Fact]
public void Records_nested_path_segments_full_reference_and_metadata()
{
var b = new CapturingAddressSpaceBuilder();
var focas = b.Folder("FOCAS", "FOCAS");
var device = focas.Folder("10.0.0.5:8193", "cnc");
var identity = device.Folder("Identity", "Identity");
identity.Variable("SeriesNumber", "SeriesNumber", new DriverAttributeInfo(
FullName: "10.0.0.5:8193/Identity/SeriesNumber",
DriverDataType: DriverDataType.String, IsArray: false, ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly, IsHistorized: false));
b.Nodes.Count.ShouldBe(1);
var n = b.Nodes[0];
n.FolderPathSegments.ShouldBe(new[] { "FOCAS", "10.0.0.5:8193", "Identity" });
n.BrowseName.ShouldBe("SeriesNumber");
n.FullReference.ShouldBe("10.0.0.5:8193/Identity/SeriesNumber");
n.DataType.ShouldBe(DriverDataType.String);
n.Writable.ShouldBeFalse(); // ViewOnly → read-only
}
[Fact]
public void AddProperty_is_ignored_and_alarm_marking_is_a_noop_sink()
{
var b = new CapturingAddressSpaceBuilder();
var f = b.Folder("FOCAS", "FOCAS");
f.AddProperty("Manufacturer", DriverDataType.String, "FANUC"); // ignored, no throw
var h = f.Variable("V", "V", new DriverAttributeInfo("ref", DriverDataType.Int32, false, null,
SecurityClassification.ViewOnly, false, IsAlarm: true));
var sink = h.MarkAsAlarmCondition(new AlarmConditionInfo("src", AlarmSeverity.Low, null));
sink.ShouldNotBeNull(); // no-op sink, alarms out of scope
b.Nodes.Count.ShouldBe(1);
}
}
```
**Step 2: Run to verify it fails**`dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~CapturingAddressSpaceBuilderTests"` → FAIL (types don't exist).
**Step 3: Implement `DiscoveredNode.cs`**
```csharp
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>
/// A flattened variable captured from a driver's <see cref="ITagDiscovery.DiscoverAsync"/> stream
/// by <see cref="CapturingAddressSpaceBuilder"/>. Folder nesting is preserved in
/// <see cref="FolderPathSegments"/> so the injector can re-root the node under an equipment.
/// </summary>
public sealed record DiscoveredNode(
IReadOnlyList<string> FolderPathSegments,
string BrowseName,
string DisplayName,
string FullReference,
DriverDataType DataType,
bool IsArray,
uint? ArrayDim,
bool Writable,
bool IsHistorized);
```
**Step 3b: Implement `CapturingAddressSpaceBuilder.cs`**
```csharp
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>
/// An <see cref="IAddressSpaceBuilder"/> that RECORDS the streamed tree instead of creating OPC UA
/// nodes — used to capture an <see cref="ITagDiscovery"/> driver's discovered hierarchy so the
/// runtime can graft it under an equipment node. Folder nesting is tracked (each child builder
/// carries its accumulated path), so every variable records its full <see cref="DiscoveredNode.FolderPathSegments"/>.
/// <para>Value nodes only: <see cref="AddProperty"/> is ignored and alarm marking returns a no-op sink
/// (discovered alarms are out of scope — alarms come via the config path).</para>
/// <para>Single-threaded: a driver's <c>DiscoverAsync</c> streams on one caller; the root and its child
/// builders share one <see cref="List{T}"/>. Not thread-safe by design.</para>
/// </summary>
public sealed class CapturingAddressSpaceBuilder : IAddressSpaceBuilder
{
private readonly List<DiscoveredNode> _nodes;
private readonly IReadOnlyList<string> _path;
public CapturingAddressSpaceBuilder() : this([], []) { }
private CapturingAddressSpaceBuilder(List<DiscoveredNode> nodes, IReadOnlyList<string> path)
{
_nodes = nodes;
_path = path;
}
/// <summary>All variables captured across the whole tree (shared by the root and every child scope).</summary>
public IReadOnlyList<DiscoveredNode> Nodes => _nodes;
public IAddressSpaceBuilder Folder(string browseName, string displayName)
=> new CapturingAddressSpaceBuilder(_nodes, [.. _path, browseName]);
public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo)
{
_nodes.Add(new DiscoveredNode(
FolderPathSegments: _path,
BrowseName: browseName,
DisplayName: displayName,
FullReference: attributeInfo.FullName,
DataType: attributeInfo.DriverDataType,
IsArray: attributeInfo.IsArray,
ArrayDim: attributeInfo.ArrayDim,
Writable: attributeInfo.SecurityClass != SecurityClassification.ViewOnly,
IsHistorized: attributeInfo.IsHistorized));
return new NullHandle(attributeInfo.FullName);
}
public void AddProperty(string browseName, DriverDataType dataType, object? value) { /* metadata only — ignored */ }
private sealed class NullHandle(string fullRef) : IVariableHandle
{
public string FullReference => fullRef;
public IAlarmConditionSink MarkAsAlarmCondition(AlarmConditionInfo info) => new NullSink();
}
private sealed class NullSink : IAlarmConditionSink
{
public void OnTransition(AlarmEventArgs args) { }
}
}
```
**Step 4: Run to verify it passes** — same filter → PASS.
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNode.cs \
src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/CapturingAddressSpaceBuilder.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/CapturingAddressSpaceBuilderTests.cs
git commit -m "feat(otopcua): capturing address-space builder for driver discovery"
```
---
## Task 2: `DiscoveredNodeMapper` — map discovered nodes under an equipment
**Classification:** standard
**Estimated implement time:** ~5 min
**Parallelizable with:** Task 3
**Files:**
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/DiscoveredInjection.cs` (the `DiscoveredFolder`/`DiscoveredVariable` materialize DTOs — placed in OpcUaServer so both the applier and the Runtime mapper can reference them; Runtime already references OpcUaServer)
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNodeMapper.cs`
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveredNodeMapperTests.cs`
**Pure function** turning `IReadOnlyList<DiscoveredNode>` + an `equipmentId` + the driver's authored-tag refs into folders + variables (NodeIds under the equipment) + routing entries. Rules:
- **Device-folder collapse:** if every node shares an identical segment at index 1 (the single device folder under the driver root), drop index 1 → `EQ/FOCAS/Identity/…` rather than `EQ/FOCAS/<deviceHost>/Identity/…`. With ≥2 devices the segments differ → not collapsed (device level retained, degrades gracefully — multi-device equipment mapping itself is a deferred follow-up).
- **Dedup:** skip any node whose `FullReference` is in `authoredRefs` (already a Config-DB equipment tag for this driver — applies to drivers like Galaxy whose discovery refs equal the equipment-tag FullNames; for FOCAS the FixedTree refs never match authored refs, so all FixedTree nodes pass through).
- **NodeId:** `EquipmentNodeIds.Variable(equipmentId, folderPath, name)` where `folderPath` = collapsed segments joined by `/`. Folders deduped, each parented at its prefix.
- **DataType:** convert `DriverDataType` → the OPC-UA-builtin string `OtOpcUaNodeManager.EnsureVariable` expects. **Reuse the existing convention** — grep for how `EquipmentTagPlan.DataType` is produced from `DriverDataType` (e.g. a `DriverDataType.ToString()` / a mapping helper) and `OtOpcUaNodeManager.ResolveBuiltInDataType`; do NOT invent a new mapping. If a helper exists, call it; the switch below is a fallback to align if not.
- **Writable:** from `DiscoveredNode.Writable` (FixedTree is read-only).
**Step 1: Write the failing test**
```csharp
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using Shouldly;
using Xunit;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
[Trait("Category", "Unit")]
public sealed class DiscoveredNodeMapperTests
{
private static DiscoveredNode Node(string[] path, string name, string fullRef,
DriverDataType dt = DriverDataType.Float64, bool writable = false)
=> new(path, name, name, fullRef, dt, false, null, writable, false);
[Fact]
public void Maps_under_equipment_collapsing_single_device_folder()
{
var nodes = new[]
{
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "10.0.0.5:8193/Identity/SeriesNumber", DriverDataType.String),
Node(["FOCAS", "10.0.0.5:8193", "Axes", "X"], "AbsolutePosition", "10.0.0.5:8193/Axes/X/AbsolutePosition"),
};
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: []);
result.Variables.Select(v => v.NodeId).ShouldBe(new[]
{
"EQ-1/FOCAS/Identity/SeriesNumber",
"EQ-1/FOCAS/Axes/X/AbsolutePosition",
}, ignoreOrder: true);
// folders: EQ-1/FOCAS, EQ-1/FOCAS/Identity, EQ-1/FOCAS/Axes, EQ-1/FOCAS/Axes/X
result.Folders.Select(f => f.NodeId).ShouldContain("EQ-1/FOCAS/Axes/X");
result.Folders.First(f => f.NodeId == "EQ-1/FOCAS/Axes/X").ParentNodeId.ShouldBe("EQ-1/FOCAS/Axes");
// routing: driverRef → nodeId
result.RoutingByRef["10.0.0.5:8193/Identity/SeriesNumber"].ShouldBe("EQ-1/FOCAS/Identity/SeriesNumber");
result.Variables.First(v => v.NodeId.EndsWith("SeriesNumber")).Writable.ShouldBeFalse();
}
[Fact]
public void Dedups_authored_refs()
{
var nodes = new[]
{
Node(["FOCAS", "10.0.0.5:8193"], "parts-count", "parts-count"), // authored
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "10.0.0.5:8193/Identity/SeriesNumber", DriverDataType.String),
};
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: new HashSet<string> { "parts-count" });
result.Variables.ShouldHaveSingleItem();
result.Variables[0].NodeId.ShouldBe("EQ-1/FOCAS/Identity/SeriesNumber");
}
[Fact]
public void Does_not_collapse_when_two_devices_present()
{
var nodes = new[]
{
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "a", DriverDataType.String),
Node(["FOCAS", "10.0.0.6:8193", "Identity"], "SeriesNumber", "b", DriverDataType.String),
};
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: []);
result.Variables.Select(v => v.NodeId).ShouldBe(new[]
{
"EQ-1/FOCAS/10.0.0.5:8193/Identity/SeriesNumber",
"EQ-1/FOCAS/10.0.0.6:8193/Identity/SeriesNumber",
}, ignoreOrder: true);
}
}
```
**Step 2: Run to verify it fails.**
**Step 3: Implement `DiscoveredInjection.cs` (DTOs)**
```csharp
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer;
/// <summary>A folder to ensure during discovered-node injection (NodeId + parent + display).</summary>
public sealed record DiscoveredFolder(string NodeId, string? ParentNodeId, string DisplayName);
/// <summary>A read-or-write variable to ensure during discovered-node injection.</summary>
public sealed record DiscoveredVariable(
string NodeId, string ParentNodeId, string DisplayName, string DataType, bool Writable, bool IsArray, uint? ArrayLength);
```
**Step 3b: Implement `DiscoveredNodeMapper.cs`**
```csharp
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>The mapped result of grafting discovered nodes under an equipment.</summary>
public sealed record DiscoveredInjectionPlan(
IReadOnlyList<DiscoveredFolder> Folders,
IReadOnlyList<DiscoveredVariable> Variables,
IReadOnlyDictionary<string, string> RoutingByRef); // driver FullReference → equipment NodeId
/// <summary>
/// Pure mapper: re-roots a driver's captured discovery tree under an equipment node, deduping
/// authored Config-DB refs and collapsing the single device-host folder. See the design doc.
/// </summary>
public static class DiscoveredNodeMapper
{
public static DiscoveredInjectionPlan Map(
string equipmentId, IReadOnlyList<DiscoveredNode> nodes, ISet<string> authoredRefs)
{
var kept = nodes.Where(n => !authoredRefs.Contains(n.FullReference)).ToList();
// Collapse a single shared device-folder level (index 1 under the driver root) when present.
var collapseIndex1 = kept.Count > 0
&& kept.All(n => n.FolderPathSegments.Count >= 2)
&& kept.Select(n => n.FolderPathSegments[1]).Distinct(StringComparer.Ordinal).Count() == 1;
static IReadOnlyList<string> Effective(IReadOnlyList<string> segs, bool collapse)
=> collapse ? [segs[0], .. segs.Skip(2)] : segs;
var folders = new Dictionary<string, DiscoveredFolder>(StringComparer.Ordinal);
var variables = new List<DiscoveredVariable>();
var routing = new Dictionary<string, string>(StringComparer.Ordinal);
foreach (var n in kept)
{
var segs = Effective(n.FolderPathSegments, collapseIndex1);
// Ensure every prefix folder EQ/seg0, EQ/seg0/seg1, …
for (var i = 0; i < segs.Count; i++)
{
var folderPath = string.Join('/', segs.Take(i + 1));
var nodeId = EquipmentNodeIds.SubFolder(equipmentId, folderPath);
if (folders.ContainsKey(nodeId)) continue;
var parent = i == 0 ? equipmentId : EquipmentNodeIds.SubFolder(equipmentId, string.Join('/', segs.Take(i)));
folders[nodeId] = new DiscoveredFolder(nodeId, parent, segs[i]);
}
var varFolderPath = string.Join('/', segs);
var varNodeId = EquipmentNodeIds.Variable(equipmentId, varFolderPath, n.BrowseName);
var varParent = EquipmentNodeIds.SubFolder(equipmentId, varFolderPath);
variables.Add(new DiscoveredVariable(
varNodeId, varParent, n.DisplayName, ToBuiltinTypeString(n.DataType), n.Writable, n.IsArray, n.ArrayDim));
routing[n.FullReference] = varNodeId;
}
return new DiscoveredInjectionPlan(folders.Values.ToList(), variables, routing);
}
// Align with the existing DriverDataType → builtin-string convention used by EquipmentTagPlan /
// OtOpcUaNodeManager.ResolveBuiltInDataType. VERIFY against that during implementation.
private static string ToBuiltinTypeString(DriverDataType dt) => dt.ToString();
}
```
> **Implementation note:** before finalizing `ToBuiltinTypeString`, grep how `EquipmentTagPlan.DataType` is produced from a `DriverDataType` and what strings `OtOpcUaNodeManager.ResolveBuiltInDataType` accepts (e.g. `"Float64"`, `"String"`, `"Int32"`). If `DriverDataType.ToString()` already matches, keep it; otherwise mirror the existing mapping helper. The mapper test asserts NodeIds/structure, not the exact type string — add a focused assertion once the convention is confirmed.
**Step 4: Run to verify it passes.**
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/DiscoveredInjection.cs \
src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNodeMapper.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveredNodeMapperTests.cs
git commit -m "feat(otopcua): map discovered nodes under an equipment subfolder"
```
---
## Task 3: Node-manager `RaiseNodesAddedModelChange()`
**Classification:** standard
**Estimated implement time:** ~4 min
**Parallelizable with:** Task 1, Task 2
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` (add a public method near `BuildNodeShapeChangedEvent:1525`)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerModelChangeOnAddTests.cs` (model on `NodeManagerSurgicalShapeUpdateTests.cs`)
Emit a Part 3 `GeneralModelChangeEvent` with verb `NodeAdded` so already-connected clients can refresh their browse after a runtime add. Mirror the existing `BuildNodeShapeChangedEvent` (verb `DataTypeChanged`) + its `Server.ReportEvent` seam, but build a `NodeAdded` change referencing the equipment subfolder root that gained children.
**Step 1: Write the failing test** — instantiate the node manager as the surgical-shape test does, `EnsureFolder` + `EnsureVariable` a couple of nodes, call `RaiseNodesAddedModelChange(parentNodeId)`, and assert it does not throw and (where the harness exposes reported events, as the surgical test does) that a `GeneralModelChangeEvent` with verb `NodeAdded` was reported. Reuse the surgical test's harness/setup verbatim.
**Step 2: Run to verify it fails** (method missing).
**Step 3: Implement** — add:
```csharp
/// <summary>
/// Announce that nodes were added at runtime (discovered-node injection) so subscribed clients can
/// refresh their browse. Part 3 §8.7.4: a GeneralModelChangeEvent is emitted by the Server object;
/// verb = NodeAdded, affected = the subfolder root that gained children. Mirrors
/// <see cref="BuildNodeShapeChangedEvent"/>'s ReportEvent seam; tolerant if auditing/eventing is off.
/// </summary>
/// <param name="affectedNodeId">The equipment/subfolder NodeId string under which nodes were added.</param>
public void RaiseNodesAddedModelChange(string affectedNodeId)
{
GeneralModelChangeEventState e;
lock (Lock)
{
// BUILD the event under Lock (consistent snapshot of _folders/_variables), mirroring
// BuildNodeShapeChangedEvent: EventId, SourceNode = ObjectIds.Server, SourceName, Time,
// Severity, a ModelChangeStructureDataType with Affected = new NodeId(affectedNodeId,
// NamespaceIndex) + Verb = (byte)ModelChangeStructureVerbMask.NodeAdded, ClearChangeMasks.
e = BuildNodesAddedModelChange(affectedNodeId);
}
// REPORT OUTSIDE Lock — Server.ReportEvent re-enters the server's own subscription/event path;
// holding Lock across it risks a lock-order inversion (mirror ReportNodeShapeChangedEvent, NOT
// ReportConditionEvent which uses alarm.ReportEvent). Tolerant: eventing off / no monitored items.
try { Server.ReportEvent(SystemContext, e); }
catch (Exception ex)
{
#pragma warning disable CS0618
Utils.LogError(ex, "OtOpcUaNodeManager: failed to report GeneralModelChangeEvent(NodeAdded) for {0}", affectedNodeId);
#pragma warning restore CS0618
}
}
```
> ⚠️ **Lock discipline (corrected 2026-06-26):** BUILD the `GeneralModelChangeEventState` under `lock (Lock)` (copy the field-population block from `BuildNodeShapeChangedEvent` `:1525`, changing only `Verb` → `NodeAdded` and `Affected`), but **REPORT `Server.ReportEvent` OUTSIDE the lock** — exactly like `ReportNodeShapeChangedEvent` / `RevertOptimisticWriteIfNeeded`. `Server.ReportEvent` re-enters the SDK subscription/event path; holding `Lock` across it risks a lock-order-inversion deadlock with a client that has event subscriptions. (An earlier draft of this plan said "keep it inside `lock (Lock)`" — that was wrong for `Server.ReportEvent`; `ReportConditionEvent` is *not* a valid analogue since it uses `alarm.ReportEvent`, the node's own notifier chain.)
**Step 4: Run to verify it passes.**
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerModelChangeOnAddTests.cs
git commit -m "feat(otopcua): GeneralModelChangeEvent(NodeAdded) for runtime node adds"
```
---
## Task 4: `AddressSpaceApplier.MaterialiseDiscoveredNodes(...)`
**Classification:** standard
**Estimated implement time:** ~4 min
**Parallelizable with:** none (depends on Tasks 2, 3)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceApplier.cs` (add after `MaterialiseEquipmentTags:304`)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/AddressSpaceApplierTests.cs` (add cases)
Add an idempotent pass that ensures the mapped folders then variables via the existing `SafeEnsureFolder`/`SafeEnsureVariable`, then raises the model-change. Folders MUST be ensured parent-before-child (sort by NodeId depth / segment count).
**Step 1: Write the failing test** — using the applier test's existing fake sink, call `MaterialiseDiscoveredNodes` with 2 folders + 2 read-only variables and assert the sink received `EnsureFolder`/`EnsureVariable` with the right NodeIds/parents, `writable: false`, and that a re-apply is a no-op (idempotent — sink early-returns on existing). Assert `RaiseNodesAddedModelChange` is invoked (extend the fake sink/node-manager double to record it, mirroring how the existing test verifies materialize calls).
**Step 2: Run to verify it fails.**
**Step 3: Implement**
```csharp
/// <summary>
/// Materialise driver-discovered nodes (FixedTree) under an equipment at runtime. Idempotent:
/// re-applies are cheap (the sink's EnsureFolder/EnsureVariable early-return on existing nodes), so
/// this is safely re-run after every address-space rebuild. Folders are ensured parent-first.
/// Emits a NodeAdded model-change so connected clients can refresh.
/// </summary>
public void MaterialiseDiscoveredNodes(
string equipmentRootNodeId,
IReadOnlyList<DiscoveredFolder> folders,
IReadOnlyList<DiscoveredVariable> variables)
{
ArgumentNullException.ThrowIfNull(folders);
ArgumentNullException.ThrowIfNull(variables);
if (folders.Count == 0 && variables.Count == 0) return;
foreach (var f in folders.OrderBy(f => f.NodeId.Count(c => c == '/')))
SafeEnsureFolder(f.NodeId, f.ParentNodeId, f.DisplayName);
foreach (var v in variables)
SafeEnsureVariable(v.NodeId, v.ParentNodeId, v.DisplayName, v.DataType, v.Writable,
historianTagname: null, isArray: v.IsArray, arrayLength: v.ArrayLength);
_sink.RaiseNodesAddedModelChange(equipmentRootNodeId);
_logger.LogInformation(
"AddressSpaceApplier: discovered nodes materialised under {Equipment} (folders={Folders}, vars={Vars})",
equipmentRootNodeId, folders.Count, variables.Count);
}
```
> Confirm `_sink`'s interface exposes `RaiseNodesAddedModelChange` (the sink type wraps `OtOpcUaNodeManager`); add it to the sink interface if the applier talks to an `IAddressSpaceSink` abstraction rather than the concrete manager.
**Step 4: Run to verify it passes.**
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceApplier.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/AddressSpaceApplierTests.cs
git commit -m "feat(otopcua): applier pass to materialise discovered nodes idempotently"
```
---
## Task 5: `OpcUaPublishActor.MaterialiseDiscoveredNodes` message + handler
**Classification:** standard
**Estimated implement time:** ~4 min
**Parallelizable with:** none (depends on Task 4)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs` (message record near the other records; `Receive<…>` at the block `:217`; handler near `HandleRebuild`)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs` (add a case)
**Step 1: Write the failing test** — with the publish-actor test harness (fake applier), send `MaterialiseDiscoveredNodes(equipmentRoot, folders, variables)` and assert the handler forwards to `_applier.MaterialiseDiscoveredNodes(...)` with the same payload.
**Step 2: Run to verify it fails.**
**Step 3: Implement** — add the message + Receive + handler:
```csharp
/// <summary>Inject driver-discovered nodes (FixedTree) under an equipment at runtime (post-connect).</summary>
public sealed record MaterialiseDiscoveredNodes(
string EquipmentRootNodeId,
IReadOnlyList<DiscoveredFolder> Folders,
IReadOnlyList<DiscoveredVariable> Variables);
```
In the Receive block (`:217`, alongside `Receive<RebuildAddressSpace>(HandleRebuild)`):
```csharp
Receive<MaterialiseDiscoveredNodes>(HandleMaterialiseDiscovered);
```
Handler:
```csharp
private void HandleMaterialiseDiscovered(MaterialiseDiscoveredNodes msg)
=> _applier.MaterialiseDiscoveredNodes(msg.EquipmentRootNodeId, msg.Folders, msg.Variables);
```
**Step 4: Run to verify it passes.**
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs
git commit -m "feat(otopcua): OpcUaPublishActor handles discovered-node materialisation"
```
---
## Task 6: `DriverInstanceActor` post-connect bounded re-discovery
**Classification:** high-risk
**Estimated implement time:** ~5 min
**Parallelizable with:** none (depends on Task 1; touches actor lifecycle)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (message records area `:60-160`; `Connected()` entry via `InitializeSucceeded:278`; new private async discovery method + a self-scheduled retry tick)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
On reaching `Connected`, if `_driver is ITagDiscovery`, run discovery into a `CapturingAddressSpaceBuilder`, and `Context.Parent.Tell(new DiscoveredNodesReady(_driverInstanceId, nodes))`. Because FOCAS suppresses FixedTree until `FixedTreeCache` populates (~02 s), schedule a bounded retry: re-run every ~2 s up to a cap (~30 s / ~15 attempts) **or until the node count stops growing** (whichever first), then stop. `DiscoverAsync` reads in-memory cache → cheap. Reset/cancel the schedule on leaving `Connected` (DisconnectObserved/ForceReconnect) and re-arm on the next `Connected` entry. Use Akka scheduling (`Context.System.Scheduler.ScheduleTellOnce` self-tell of an internal `RediscoverTick`, tracked by an `ICancelable` so it's cancelled on state exit) — do NOT block the actor thread.
**Message records to add** (near the other nested records):
```csharp
/// <summary>Published to the parent (DriverHostActor) after a post-connect discovery pass.</summary>
public sealed record DiscoveredNodesReady(string DriverInstanceId, IReadOnlyList<DiscoveredNode> Nodes);
/// <summary>Internal self-tick driving bounded post-connect re-discovery.</summary>
private sealed record RediscoverTick(int Generation, int Attempt, int LastCount);
```
**Step 1: Write the failing test** — drive a `DriverInstanceActor` with a fake `IDriver` that also implements `ITagDiscovery`, whose `DiscoverAsync` yields 0 nodes on the first ~2 attempts then a non-empty set (simulating FixedTreeCache populating). Bring the actor to `Connected` (send the same init messages the existing `DriverInstanceActorTests` use). Use the TestKit parent probe (`Context.Parent` → the TestKit `TestActor` via `ActorOf` under the testkit, or the existing harness's parent-probe pattern in `DriverInstanceActorTests`) and `ExpectMsg<DiscoveredNodesReady>` — assert the eventually-delivered message carries the non-empty set, and that re-ticks stop after the set stabilises (no infinite stream). Use the TestKit scheduler / `Within` to advance.
**Step 2: Run to verify it fails.**
**Step 3: Implement** — add the discovery kick at the `InitializeSucceeded` Connected transition (after `ResubscribeDesired()`), a `Receive<RediscoverTick>` in `Connected()`, and a `RunDiscoveryAsync` that:
- guards `_driver is ITagDiscovery disc` (else no-op),
- builds a `CapturingAddressSpaceBuilder`, awaits `disc.DiscoverAsync(builder, ct)`,
- `Context.Parent.Tell(new DiscoveredNodesReady(_driverInstanceId, builder.Nodes))`,
- if `attempt < cap` and `builder.Nodes.Count` still growing (or zero), schedules the next `RediscoverTick(_initGeneration, attempt+1, builder.Nodes.Count)` via `ICancelable` (store in a field, cancel on `DetachSubscription`/state exit).
- Tag ticks with `_initGeneration` and ignore stale-generation ticks (mirrors the existing `InitializeSucceeded.Generation` guard) so a reconnect cancels the prior loop.
> Use `ReceiveAsync<RediscoverTick>` (like the other async receives in `Connected()`), and wrap the discovery call in try/catch → log Info + reschedule (bounded). Mirror the existing cancelable-scheduling pattern already used in the actor (grep `Scheduler`/`ICancelable` in this file and `DriverHostActor`).
**Step 4: Run to verify it passes.**
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs
git commit -m "feat(otopcua): driver-instance post-connect bounded re-discovery"
```
---
## Task 7: `DriverHostActor` — inject discovered nodes (handler + routing + subscribe)
**Classification:** high-risk
**Estimated implement time:** ~5 min
**Parallelizable with:** none (depends on Tasks 2, 5, 6; touches actor + routing map)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (fields near `_nodeIdByDriverRef`; `Receive<…>` in BOTH receive states `:482` and `:512`; new handler; store `_lastComposition` in `PushDesiredSubscriptions`)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
Add `Receive<DriverInstanceActor.DiscoveredNodesReady>(HandleDiscoveredNodes)` to the two states that already handle `AttributeValuePublished` (`:484`, `:512`). New fields: `_lastComposition` (set at the end of `PushDesiredSubscriptions`) and `_discoveredByDriver` (`Dictionary<string, DiscoveredInjectionPlan>`). The handler:
1. If `_lastComposition` is null → stash nothing / log Debug and return (composition not applied yet; a later `DiscoveredNodesReady` retry will land after apply).
2. Resolve the equipment: `_lastComposition.EquipmentNodes.Where(e => e.DriverInstanceId == id)`. 0 → log Info skip; >1 → log Warning skip (multi-device deferred). Else take its `EquipmentId`.
3. Compute `authoredRefs` = `_lastComposition.EquipmentTags.Where(t => t.DriverInstanceId == id).Select(t => t.FullName)` set.
4. `var plan = DiscoveredNodeMapper.Map(equipmentId, msg.Nodes, authoredRefs);`
5. If `plan.Variables` empty → return (nothing new yet).
6. `_discoveredByDriver[id] = plan;`
7. For each `(ref, nodeId)` in `plan.RoutingByRef`: add to `_nodeIdByDriverRef[(id, ref)]` (the same `HashSet` fan-out structure used in `PushDesiredSubscriptions:1019`).
8. `_opcUaPublishActor.Tell(new OpcUaPublishActor.MaterialiseDiscoveredNodes(equipmentId, plan.Folders, plan.Variables));`
9. Merge the discovered refs into the driver's desired set and re-push: `child.Actor.Tell(new DriverInstanceActor.SetDesiredSubscriptions(union, SubscriptionPublishingInterval, alarmRefs))` where `union` = authored refs already pushed for that driver **plus** `plan.RoutingByRef.Keys`. (Keep the alarmRefs as last pushed.) The actor's `Connected` `SetDesiredSubscriptions` handler immediately re-subscribes (`:340-353`).
**Step 1: Write the failing test** — build a `DriverHostActor` via its existing test harness (`DriverHostActorTests`/`...WriteRoutingTests` show construction with fakes: a fake child/registry, fake OPC publish probe, a composition artifact). Apply a deployment whose composition has one equipment (`EQ-1`, `DriverInstanceId=d1`) + one authored tag, so `_lastComposition` is set and a child `d1` exists. Send `DriverInstanceActor.DiscoveredNodesReady("d1", <fixedtree nodes>)`. Assert: (a) the OPC publish probe received `MaterialiseDiscoveredNodes` with the mapped folders/vars; (b) the child probe received a `SetDesiredSubscriptions` whose refs include both the authored ref and the FixedTree refs; (c) a subsequent `AttributeValuePublished(d1, <fixedtree ref>, value)` routes to an `AttributeValueUpdate` at the mapped NodeId (proves `_nodeIdByDriverRef` updated).
**Step 2: Run to verify it fails.**
**Step 3: Implement** per the steps above. Store `_lastComposition = composition;` at the end of `PushDesiredSubscriptions` (after the existing logic). Reuse the exact fan-out add pattern for `_nodeIdByDriverRef` from `:1019-1045`.
**Step 4: Run to verify it passes.**
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs
git commit -m "feat(otopcua): inject discovered nodes into the equipment projection on connect"
```
---
## Task 8: `DriverHostActor` — re-inject discovered nodes after a rebuild
**Classification:** high-risk
**Estimated implement time:** ~3 min
**Parallelizable with:** none (depends on Task 7; same file)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (tail of `PushDesiredSubscriptions`)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs` (add a case)
A structural redeploy triggers `RebuildAddressSpace` (full teardown) and `PushDesiredSubscriptions` rebuilds `_nodeIdByDriverRef` from authored tags only — losing the injected FixedTree nodes + mappings. After the existing `PushDesiredSubscriptions` work, **re-apply the cached `_discoveredByDriver`**: for each cached plan, re-add its `RoutingByRef` to `_nodeIdByDriverRef`, re-`Tell` `MaterialiseDiscoveredNodes`, and re-merge its refs into that driver's pushed `SetDesiredSubscriptions`.
**Step 1: Write the failing test** — after Task 7's injection, simulate a second `PushDesiredSubscriptions` (re-apply the same deployment). Assert the OPC publish probe receives `MaterialiseDiscoveredNodes` AGAIN and the child's re-pushed `SetDesiredSubscriptions` still includes the FixedTree refs (i.e. they weren't dropped by the rebuild).
**Step 2: Run to verify it fails** (today the rebuild drops them).
**Step 3: Implement** — extract the per-driver merge-and-materialise into a helper reused by both `HandleDiscoveredNodes` and a new `ReapplyDiscovered()` call at the tail of `PushDesiredSubscriptions` (after `_lastComposition` is set). Guard for the case where the driver no longer exists in `_children` or the equipment was removed (drop that cache entry).
**Step 4: Run to verify it passes.**
**Step 5: Commit**
```bash
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs \
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs
git commit -m "feat(otopcua): re-inject discovered nodes after address-space rebuild"
```
---
## Task 9: Integration test — discovered nodes appear + carry values + survive lifecycle
**Classification:** standard
**Estimated implement time:** ~5 min
**Parallelizable with:** none (depends on Tasks 7, 8)
**Files:**
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveryInjectionEndToEndTests.cs`
- (Reuse / extend any existing in-memory `IDriver` test double in the Runtime tests; create a `FakeDiscoverableDriver : IDriver, ITagDiscovery, ISubscribable` if none fits.)
A focused in-process integration test (no docker, no CNC): wire `DriverHostActor` + `OpcUaPublishActor` + a real `AddressSpaceApplier`/node manager (as the publish-actor rebuild tests do) + a fake discoverable+subscribable driver whose `DiscoverAsync` exposes a delayed FixedTree set and whose poll returns values for those refs. Assert end-to-end:
1. After connect + the discovery delay, the node manager has variables at `EQ-…/FOCAS/…`.
2. A poll value for a FixedTree ref surfaces as a Good `AttributeValueUpdate` at the mapped NodeId (no longer `BadWaitingForInitialData`).
3. After a simulated rebuild (re-apply), the nodes + values persist.
> If a full wiring proves too heavy for one test fixture, split into (9a) host→publish materialisation reaching a real node manager, and (9b) value-route smoke — but keep both in this file. Do NOT silently drop the lifecycle assertion; if you cannot wire a real node manager here, log that limitation in the test summary and cover it in Task 10's docker-dev step instead.
**Step 5: Commit**
```bash
git add tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveryInjectionEndToEndTests.cs
git commit -m "test(otopcua): end-to-end discovered-node injection + value flow"
```
---
## Task 10: Build + full suite + docker-dev smoke
**Classification:** small
**Estimated implement time:** ~5 min
**Parallelizable with:** none (depends on all prior)
**Files:** none (verification only; fix wiring if the build/tests surface gaps)
**Steps:**
1. `dotnet build ZB.MOM.WW.OtOpcUa.slnx` → 0 errors, 0 warnings.
2. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~Runtime.Tests"` → green.
3. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~OpcUaServer.Tests"` → green.
4. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~FOCAS"` → green (no regression).
5. **docker-dev smoke (optional but recommended):** build the docker-dev image, boot `central-1` (fused admin+driver), confirm via logs that a connected discoverable driver injects nodes (`AddressSpaceApplier: discovered nodes materialised`) and that browse shows `EQ-…/FOCAS/…`. (Mirror the symptom-#1 docker-dev confirmation in the investigation plan.)
6. Commit any wiring fixes with a `fix(otopcua):` message.
---
## Task 11: Docs
**Classification:** trivial
**Estimated implement time:** ~3 min
**Parallelizable with:** none (depends on Task 10)
**Files:**
- Modify: `docs/plans/2026-06-25-otopcua-equipment-dataplane-investigation.md` (mark symptom #2 / the FixedTree feature done, link this plan + the design doc)
- Modify: `docs/deployments/wonder-app-vd03-makino-z-34184.md` (note FixedTree now surfaces under `EQ-…/FOCAS/…`)
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md` (status → Implemented)
**Step: Commit**
```bash
git add docs/plans/2026-06-25-otopcua-equipment-dataplane-investigation.md \
docs/deployments/wonder-app-vd03-makino-z-34184.md \
docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md
git commit -m "docs(otopcua): record FixedTree-under-Equipment injection feature"
```
---
## Live deploy (post-plan, with user confirmation)
Following the symptom-#1 pattern (self-contained publish-overlay → wonder): after the suite is green and docker-dev confirms, **confirm with the user before deploying to the production CNC node**, then deploy and browse `EQ-3686c0272279/FOCAS/Identity/SeriesNumber` + `…/Axes/X/AbsolutePosition` (assert status Good — values may be 0 on the idle machine). Live deploy is explicitly NOT part of the build/test gate.
## Follow-ups (out of scope, documented)
- Discovered **alarms** injection; **multi-device-per-driver-instance** equipment mapping; **writable** discovered nodes.
- Reconcile the AdminUI↔driver FOCAS config-format mismatch (series-as-number, scheme-less host) at the AdminUI source.
- Shared `AddZbSerilog` not setting static `Serilog.Log.Logger` (latent across all 3 apps).
@@ -0,0 +1,23 @@
{
"planPath": "docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection.md",
"tasks": [
{"id": 1, "subject": "Task 1: DiscoveredNode DTO + CapturingAddressSpaceBuilder", "status": "completed"},
{"id": 2, "subject": "Task 2: DiscoveredNodeMapper + materialize DTOs", "status": "completed", "blockedBy": [1]},
{"id": 3, "subject": "Task 3: NodeManager RaiseNodesAddedModelChange", "status": "completed"},
{"id": 4, "subject": "Task 4: AddressSpaceApplier.MaterialiseDiscoveredNodes", "status": "completed", "blockedBy": [2, 3]},
{"id": 5, "subject": "Task 5: OpcUaPublishActor.MaterialiseDiscoveredNodes message+handler", "status": "completed", "blockedBy": [4]},
{"id": 6, "subject": "Task 6: DriverInstanceActor post-connect bounded re-discovery", "status": "completed", "blockedBy": [1]},
{"id": 7, "subject": "Task 7: DriverHostActor inject discovered nodes", "status": "completed", "blockedBy": [2, 5, 6]},
{"id": 8, "subject": "Task 8: DriverHostActor re-inject after rebuild", "status": "completed", "blockedBy": [7]},
{"id": 9, "subject": "Task 9: End-to-end discovered-node injection test", "status": "completed", "blockedBy": [7, 8]},
{"id": 10, "subject": "Task 10: Build + full suite + docker-dev smoke", "status": "completed", "blockedBy": [9]},
{"id": 11, "subject": "Task 11: Docs", "status": "completed", "blockedBy": [10]}
],
"nativeTaskIds": {
"1": 21, "2": 22, "3": 23, "4": 24, "5": 25, "6": 26,
"7": 27, "8": 28, "9": 29, "10": 30, "11": 31
},
"lastUpdated": "2026-06-26T00:00:00Z",
"status": "offline-complete; live wonder validation pending",
"branch": "feat/focas-fixedtree-equipment-injection"
}
@@ -0,0 +1,234 @@
# OtOpcUa — FixedTree-injection follow-ups (design)
**Date:** 2026-06-26
**Status:** ✅ Implemented (2026-06-26) — all five follow-ups (AE) built via subagent-driven development
(16 commits `c2c368dc`..`0074f37a` on `feat/focas-fixedtree-equipment-injection`; every task spec+code
reviewed, high-risk tasks with serial Opus reviews). Offline suites green: Runtime.Tests 331, OpcUaServer.Tests 319,
FOCAS 248 + AbLegacy/composer additions; `dotnet build` 0 errors, production `src/` 0 warnings (TreatWarningsAsErrors).
No DB migration and no deployment-artifact wire-format change were needed (E is projection-only — the columns +
`Devices` array were already serialized). Live wonder re-validation of the single-device FOCAS path is optional/user-gated
(the base feature's live path is unchanged by these follow-ups).
**Companion to:** [`2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](2026-06-26-otopcua-fixedtree-equipment-injection-design.md)
(the base feature — ✅ built + live-validated on `wonder-app-vd03`). This design works through the
follow-ups that feature's review chain surfaced.
**Branch:** continue on `feat/focas-fixedtree-equipment-injection` (stacked on `fix/focas-poll-io-serialization`,
local/unpushed — standing rule is "commit/push only when asked").
---
## Scope (locked with the user 2026-06-26)
The user selected **all five** items below. The base feature's explicit non-goals — discovered-**alarm**
injection and **writable** discovered nodes — remain out of scope (locked design decisions, untouched).
| # | Follow-up | Size | Notes |
|---|---|---|---|
| A | Hardcoded 30 s discovery timeout → injectable | trivial | behavior-preserving |
| B | Re-discovery opt-in/policy gate per driver | moderate | back-compat default |
| C | Config-unchanged driver→equipment rebind re-triggers discovery | moderate | reverses a deliberate `won't-fix` |
| D | De-dup the double `SetDesiredSubscriptions` during redeploy | small | one extra unsub/resub blip today |
| E | Lift the ≥1-authored-tag requirement + multi-device-per-driver | largest | **projection-only, no DB migration** |
## Key discovery that shapes E
The Config-DB **already** models the equipment→driver(→device) association as first-class data — no schema
change is needed for E:
- `Equipment.DriverInstanceId` (`string?`, made nullable by migration `20260608104706_NullableEquipmentDriverInstanceId`)
- `Equipment.DeviceId` (`string?`) — FK to a multi-device driver's device
- `Device` is a first-class entity (`DeviceId`, `DriverInstanceId`, schemaless `DeviceConfig` JSON with host)
- The AdminUI equipment editor already exposes an optional driver pick (`EquipmentInput.DriverInstanceId`)
The **only** gap is the runtime projection: `EquipmentNode` is `(EquipmentId, DisplayName, UnsLineId)` and drops
`DriverInstanceId`/`DeviceId`, so the injector (`DriverHostActor.HandleDiscoveredNodes`) can only resolve the
equipment by inferring it from authored `EquipmentTags` — hence the ≥1-tag requirement. E closes that gap in the
projection + resolver, not the schema.
---
## A. Discovery timeout → injectable
`DriverInstanceActor.HandleRediscoverAsync` hardcodes `new CancellationTokenSource(TimeSpan.FromSeconds(30))`
while the rediscover interval + attempt-cap are already constructor parameters. Add a
`rediscoverDiscoverTimeout` (`TimeSpan`, default `TimeSpan.FromSeconds(30)`) to the ctor and the `Props`
factory; use the field instead of the literal. Pure consistency fix; default preserves behavior.
## B. Re-discovery opt-in / policy gate
**Problem:** `StartDiscovery()` runs the bounded retry loop for **every** `ITagDiscovery` driver on every
(re)connect. FOCAS needs it (its `FixedTreeCache` fills ~02 s *after* connect, so a single early pass would
capture an empty/partial tree). A driver that browses its full shape **synchronously** inside `DiscoverAsync`
(OpcUaClient, TwinCAT, AB) needs at most **one** pass — the 15×2 s retry is wasted (potentially heavy) network I/O.
**Decision:** a per-driver **policy**, declared in code (driver "heaviness" is a property of the driver *type*,
needs no DB/AdminUI plumbing, lowest risk). Add a default-implemented member to `ITagDiscovery`:
```csharp
public enum DiscoveryRediscoverPolicy { UntilStable, Once, Never }
public interface ITagDiscovery
{
DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable; // default = today's behavior
Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken);
}
```
- **`UntilStable`** (default, unchanged) — today's loop: retry every `_rediscoverInterval` up to
`_rediscoverMaxAttempts` or until the captured signature is non-empty and stable.
- **`Once`** — kick exactly one discovery pass on connect, emit one `DiscoveredNodesReady`, then stop.
- **`Never`** — no post-connect discovery kick at all.
**Driver assignments:**
- **FOCAS** → `UntilStable` (explicit; it genuinely needs the retry).
- **OpcUaClient, TwinCAT, AbCip, AbLegacy** → `Once` (they discover synchronously in `DiscoverAsync`; one pass
on connect injects their tree, the retry loop only added cost). Any driver not overriding the default keeps
`UntilStable`, so this is a strict no-regression change.
**Mechanism:** `DriverInstanceActor.StartDiscovery` reads `((ITagDiscovery)_driver).RediscoverPolicy`. `Never`
→ return without scheduling. `Once`/`UntilStable` → schedule the first `RediscoverTick`; `HandleRediscoverAsync`
stops after the first pass when the policy is `Once` (instead of evaluating stop-on-stable).
**Alternative considered (rejected):** a per-instance JSON flag parsed by the host + AdminUI. More flexible but
adds artifact/AdminUI plumbing for a knob whose correct value is type-uniform.
## C. Config-unchanged rebind re-triggers discovery
**Problem:** when a redeploy rebinds a driver to a new equipment **without** a `DriverConfig` change,
`PushDesiredSubscriptions`' re-inject tail correctly **drops** the stale cached plan (a stale `EQ-1`-scoped graft
under `EQ-2` would be worse), but `ReconcileDrivers` only restarts a child on a `DriverConfig` change — so a
config-unchanged child is never reconnected and the FixedTree stays absent under the new equipment until the
driver's next natural reconnect/restart.
The base feature deliberately did **not** add a re-trigger here, to avoid coupling the subscription pass to
driver-lifecycle control. This follow-up reverses that — but cleanly, because the trigger is a **discovery**
action, not lifecycle control (no stop/restart), and it is idempotent.
**Decision:** add a `DriverInstanceActor.TriggerRediscovery` message. In the re-inject tail, the two branches
that `Remove` a cached plan because of a rebind/loss also `Tell` that driver's child `TriggerRediscovery`. The
child kicks a fresh `RediscoverTick` (current `_initGeneration`) **iff it is in the `Connected` state**;
otherwise it no-ops (its eventual reconnect re-discovers anyway). The discovery pass re-emits
`DiscoveredNodesReady`, which resolves against the **new** composition (`_lastComposition`) and grafts under the
new equipment. The re-trigger honors B's policy (`Never` drivers do not re-discover; `Once`/`UntilStable` run
their normal pass(es)).
Update the inline comment at the drop site and the follow-up note in the base design doc to record the new
behavior.
## D. De-dup the double `SetDesiredSubscriptions`
**Problem:** during an in-process redeploy, a cached driver receives two `SetDesiredSubscriptions`:
the bulk authored-only send in `PushDesiredSubscriptions`, then the authoreddiscovered union from
`ApplyDiscoveredPlan` (the re-inject tail). The first send forces the child to unsubscribe the whole handle
(authored tags included) then the second re-subscribes — one extra blip per cached driver per redeploy.
**Decision:** in the bulk loop, **skip** the send for any driver that has a `_discoveredByDriver` entry — the
re-inject tail sends their complete union. **Critical fallback:** the re-inject tail can still *drop* a cached
plan (rebind/loss, see C); when it does, it must send the **authored-only** set for that driver so its authored
subscriptions are not lost. Net invariant: every driver receives exactly **one** `SetDesiredSubscriptions` per
redeploy.
## E. Lift the ≥1-authored-tag requirement + multi-device
No DB migration — projection + resolver only.
**E1 — projection (`AddressSpaceComposer`).** Extend `EquipmentNode`:
```csharp
public sealed record EquipmentNode(
string EquipmentId,
string DisplayName,
string UnsLineId,
string? DriverInstanceId = null, // from Equipment.DriverInstanceId
string? DeviceId = null, // from Equipment.DeviceId
string? DeviceHost = null); // resolved at projection time: Equipment.DeviceId -> Device.DeviceConfig host
```
`DriverInstanceId`/`DeviceId` are copied straight off the `Equipment` row. `DeviceHost` is resolved by joining
`Equipment.DeviceId → Device` and parsing the host out of that `Device`'s schemaless `DeviceConfig` JSON, so the
resolver can match it against a discovered device-host folder without re-reading the DB. All three are nullable;
existing single-equipment behavior is unaffected when they're null.
**E2 — resolver (`DriverHostActor.HandleDiscoveredNodes`, and the redeploy re-inject tail).** Replace tag-only
equipment resolution with:
> **candidates** = { equipments where `EquipmentNode.DriverInstanceId == driverId` } ****
> { equipments inferred from authored `EquipmentTags` for `driverId` } (keeps today's path working)
- **0 candidates** → log Info, skip (unchanged).
- **1 candidate** → graft all discovered nodes under it (today's single-device behavior, now also works with
**zero** authored tags because the equipment-level `DriverInstanceId` resolves it). The device-host folder is
still collapsed (single device).
- **>1 candidates (multi-device)** → partition discovered nodes by their device-host folder segment
(`DiscoveredNode.FolderPathSegments[1]`) and graft each device's subtree under the equipment whose
`DeviceHost` matches that segment. A device-host with no matching equipment is **warn-skipped** (its subtree is
not grafted) rather than mis-grafted. The mapper's existing device-host collapse already disables itself when
≥2 distinct device-host segments are present, so multi-device paths retain the device-host level and don't
collide.
**⚠️ Implementation risk (E2 multi-device only):** the partition join is a **host string** match — the driver's
emitted device-host folder segment (FOCAS uses `device.HostAddress`) must equal the equipment's projected
`DeviceHost` (parsed from `DeviceConfig`). Both ultimately derive from the same device configuration, but the
string forms must be normalized to match (e.g. `host:port`). The warn-skip fallback makes a mismatch *safe* (no
mis-graft, authored tags + single-device paths unaffected); a normalization helper + a unit test pin the formats
together. Single-device deployments (the validated FOCAS `z-34184` case) take the "1 candidate" path and are
**not** exposed to this risk.
---
## Data flow (unchanged)
E changes only *which equipment* a discovered node is grafted under and *whether* an equipment with no authored
tags participates. Once the NodeId is assigned, the materialize → subscribe → poll → push value path is exactly
the base feature's path; B/C/D change *when/how often* discovery runs and *how many* subscription pushes occur,
not the value path.
## Error handling
- B `Never` driver → no discovery, authored tags unaffected.
- C re-trigger on a non-`Connected` child → no-op (safe; reconnect re-discovers).
- D dropped-plan fallback → authored-only send, so a rebind/loss never strands a driver's authored subscriptions.
- E multi-device unmatched device-host → Warning + skip that device's subtree; other devices + authored tags
unaffected. >1 candidate with no `DeviceHost` data anywhere → falls back to the base feature's warn+skip
(no regression).
## Testing
- **A:** ctor/`Props` wires the timeout; default is 30 s (assert via a short injected timeout in an existing
rediscover test).
- **B:** `Never` → no `DiscoveredNodesReady`; `Once` → exactly one even when the captured set would keep growing;
`UntilStable` → today's loop (regression). FOCAS reports `UntilStable`; the four network drivers report `Once`.
- **C:** rebind drop branch `Tell`s `TriggerRediscovery`; `Connected` child re-discovers and re-emits;
non-`Connected` child no-ops; re-trigger respects a `Never` policy.
- **D:** single-send invariant — a cached driver gets exactly one `SetDesiredSubscriptions` on redeploy
(union when applied; authored-only when the plan is dropped).
- **E1:** `EquipmentNode` projection carries `DriverInstanceId`/`DeviceId`/`DeviceHost`; `DeviceHost` resolves via
the `Device` join + `DeviceConfig` host parse; nulls when unset.
- **E2:** tag-less graft (driver-level link, 0 authored tags); single-candidate unchanged (collapse retained);
multi-device partition maps each device-host to the right equipment; unmatched device-host → warn-skip;
host-string normalization.
- **Regression:** Runtime.Tests, OpcUaServer.Tests, and the FOCAS suite stay green; the validated single-device
FOCAS injection path is unchanged.
- Live wonder re-validation of the single-device FOCAS path is **optional** and user-gated (the base feature is
already live-validated; these follow-ups don't alter that path's runtime behavior).
## Scope / non-goals
- **In:** AE above.
- **Out (still locked):** discovered-**alarm** injection; **writable** discovered nodes.
## Touched code (anticipated)
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs``DiscoveryRediscoverPolicy` enum + default member.
- Driver classes (`FocasDriver`, `OpcUaClientDriver`, `TwinCATDriver`, `AbCipDriver`, `AbLegacyDriver`) — override
`RediscoverPolicy`.
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` — injectable timeout (A); policy-gated
`StartDiscovery`/`HandleRediscoverAsync` (B); `TriggerRediscovery` message (C).
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` — resolver union + multi-device partition (E2);
re-trigger on rebind drop (C); bulk-send skip + dropped-plan fallback (D).
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceComposer.cs``EquipmentNode` projection (E1).
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNodeMapper.cs` — multi-device partition support (E2).
- Tests under `tests/.../Runtime.Tests` and `tests/.../OpcUaServer.Tests`.
## Task tracking
Implementation tasks to be generated by writing-plans from this design.
@@ -0,0 +1,358 @@
# FixedTree-injection follow-ups — Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:subagent-driven-development (or executing-plans) to implement this plan task-by-task.
**Goal:** Implement the five approved follow-ups to the FixedTree-under-Equipment dynamic-injection feature: (A) injectable discovery timeout, (B) per-driver re-discovery policy gate, (C) re-trigger discovery on a config-unchanged rebind, (D) de-dup the double `SetDesiredSubscriptions`, and (E) lift the ≥1-authored-tag requirement + support multi-device-per-driver.
**Architecture:** Akka.NET actor pipeline. `DriverInstanceActor` runs post-connect discovery and publishes `DiscoveredNodesReady`; `DriverHostActor` resolves the bound equipment, maps discovered nodes via `DiscoveredNodeMapper`, caches a plan, materialises via `OpcUaPublishActor`, and merges subscription refs. Composition is built by `AddressSpaceComposer.Compose` (pure, from entities) and mirrored by `DeploymentArtifact` (decode, from the sealed JSON artifact) — the two MUST stay byte-parity-equal. The deployment artifact already serialises full `Equipment` + `Device` entities, so E needs **no DB migration and no artifact wire-format change** — only decode/projection reads.
**Tech Stack:** .NET 10, C# (default interface members, collection expressions), Akka.NET, xUnit. Build: `dotnet build ZB.MOM.WW.OtOpcUa.slnx` (TreatWarningsAsErrors). Test (macOS — run filtered, NOT full-solution; the net48 Wonderware testhost can't run on macOS):
- `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~Runtime.Tests"`
- `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~OpcUaServer.Tests"`
- `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~FOCAS"`
**Design:** [`2026-06-26-otopcua-fixedtree-followups-design.md`](2026-06-26-otopcua-fixedtree-followups-design.md). Branch: `feat/focas-fixedtree-equipment-injection` (continue on it; commit per task; do NOT push/merge — standing rule).
**Out of scope (locked):** discovered-alarm injection; writable discovered nodes.
---
## Execution order & parallelism
Two files are each touched by multiple tasks and MUST be edited serially:
- `DriverInstanceActor.cs`: **Task 1 → Task 3 → Task 4**
- `DriverHostActor.cs`: **Task 6 → Task 7 → Task 8 → Task 9**
Independent file sets that can run concurrently with the above: **Task 2** (`ITagDiscovery` + 5 driver files) and **Task 5** (`AddressSpaceComposer.cs` + `DeploymentArtifact.cs`).
Dependency summary: T3 ⟵{T1,T2}; T4 ⟵T3; T6 ⟵T5; T7 ⟵{T4,T6}; T8 ⟵T7; T9 ⟵{T5,T8}; T10 ⟵T9; T11 ⟵{T2,T4,T9,T10}.
---
### Task 1: Injectable discovery timeout (follow-up A)
**Classification:** small
**Estimated implement time:** ~3 min
**Parallelizable with:** Task 2, Task 5
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (ctor ~244-259, `Props` ~195-210, fields ~133-137, `HandleRediscoverAsync` ~765)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
**Context:** `HandleRediscoverAsync` hardcodes `using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));` (line 765). The rediscover interval + attempt-cap are already ctor params (`_rediscoverInterval`, `_rediscoverMaxAttempts`). Add a sibling param for the per-pass discovery timeout, default-preserving.
**Step 1 — Failing test:** add a test asserting that when constructed with a very short discovery timeout and an `ITagDiscovery` whose `DiscoverAsync` blocks, the pass cancels by the injected timeout (e.g. `DiscoveredNodesReady` carries an empty set within the short window) rather than waiting 30 s. Reuse the existing fake `ITagDiscovery` driver in this test file (search it for the existing discovery-actor fake; mirror that pattern). If a fully deterministic timeout test is too flaky, instead assert the wiring: a new public `DefaultRediscoverDiscoverTimeout` constant exists and equals 30 s, and the ctor/`Props` accept the param.
**Step 2 — Verify it fails:** `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~DriverInstanceActorDiscoveryTests"` → fails to compile / fails assertion.
**Step 3 — Implement:**
- Add `public static readonly TimeSpan DefaultRediscoverDiscoverTimeout = TimeSpan.FromSeconds(30);` next to the other discovery defaults (~line 36-39).
- Add a field `private readonly TimeSpan _rediscoverDiscoverTimeout;` (~133-137).
- Add ctor param `TimeSpan? rediscoverDiscoverTimeout = null` (after `rediscoverMaxAttempts`); assign `_rediscoverDiscoverTimeout = rediscoverDiscoverTimeout ?? DefaultRediscoverDiscoverTimeout;`.
- Add the matching optional param to `Props` and forward it.
- In `HandleRediscoverAsync`, replace `TimeSpan.FromSeconds(30)` with `_rediscoverDiscoverTimeout`.
**Step 4 — Verify:** test passes; `dotnet build ZB.MOM.WW.OtOpcUa.slnx` → 0 warnings.
**Step 5 — Commit:** `git commit -m "feat(otopcua): make FixedTree re-discovery per-pass timeout injectable (follow-up A)"`
---
### Task 2: Re-discovery policy enum + ITagDiscovery member + driver overrides (follow-up B, part 1)
**Classification:** standard
**Estimated implement time:** ~5 min
**Parallelizable with:** Task 1, Task 5
**Files:**
- Modify: `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs`
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS/FocasDriver.cs`
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs`
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriver.cs`
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriver.cs`
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriver.cs`
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs` (or a small new test next to the FOCAS driver tests asserting `FocasDriver` reports `UntilStable`)
**Context:** `ITagDiscovery` (Core.Abstractions) currently has only `DiscoverAsync`. Add a policy the actor (Task 3) honors. Default = today's behavior so any non-overriding driver is unchanged.
**Step 1 — Failing test:** assert `new FocasDriver(...).RediscoverPolicy == DiscoveryRediscoverPolicy.UntilStable` and that one network driver (e.g. `OpcUaClientDriver`) reports `Once`. (Construct via the simplest available ctor/fake; if drivers are hard to construct standalone, assert the enum + default member exist and compile, plus a focused test on FOCAS.)
**Step 2 — Verify it fails:** compile failure (enum/member absent).
**Step 3 — Implement:**
- In `ITagDiscovery.cs`, add the enum + a default-implemented member:
```csharp
/// <summary>How aggressively the host re-runs post-connect discovery for this driver.</summary>
public enum DiscoveryRediscoverPolicy
{
/// <summary>Retry every interval up to the cap or until the captured set is non-empty and stable
/// (for drivers whose discovered shape fills in asynchronously after connect, e.g. FOCAS FixedTree).</summary>
UntilStable,
/// <summary>Run exactly one discovery pass on connect (drivers that discover synchronously in DiscoverAsync).</summary>
Once,
/// <summary>Never run post-connect discovery.</summary>
Never,
}
public interface ITagDiscovery
{
/// <summary>Post-connect re-discovery policy. Default preserves the original retry-until-stable behavior.</summary>
DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;
Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken);
}
```
- `FocasDriver`: add `public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;` (explicit — it genuinely needs the retry loop).
- `OpcUaClientDriver`, `TwinCATDriver`, `AbCipDriver`, `AbLegacyDriver`: add `public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;` — these discover synchronously inside `DiscoverAsync`, so one pass on connect suffices; the 15× retry was wasted (potentially heavy) work. **Before setting `Once`, confirm each driver's `DiscoverAsync` returns its complete set synchronously** (read each `DiscoverAsync`); if any populates a cache asynchronously after connect like FOCAS, leave it `UntilStable` and note why in a comment.
**Step 4 — Verify:** test passes; build 0 warnings.
**Step 5 — Commit:** `git commit -m "feat(otopcua): add ITagDiscovery.RediscoverPolicy + per-driver assignments (follow-up B)"`
---
### Task 3: DriverInstanceActor honors RediscoverPolicy (follow-up B, part 2)
**Classification:** standard
**Estimated implement time:** ~5 min
**Parallelizable with:** none (serial after Task 1 on the same file; needs Task 2's enum)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (`StartDiscovery` ~736-740, `HandleRediscoverAsync` ~754-795)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
**Context:** `StartDiscovery()` currently kicks the loop for every `ITagDiscovery` driver. `HandleRediscoverAsync` schedules the next tick unless stable/capped. Gate both on the driver's `RediscoverPolicy`.
**Step 1 — Failing tests (3):**
1. A fake `ITagDiscovery` driver reporting `Never` → no `DiscoveredNodesReady` is ever published after connect.
2. A fake reporting `Once` whose captured set would keep GROWING across passes → exactly ONE `DiscoveredNodesReady` and no further tick scheduled.
3. A fake reporting `UntilStable` → existing behavior (retries until stable/cap) — keep/extend the current passing test.
**Step 2 — Verify they fail:** the `Never`/`Once` tests fail (today everything retries-until-stable).
**Step 3 — Implement:**
- In `StartDiscovery()`: after the `if (_driver is not ITagDiscovery discovery) return;` guard, read the policy; `if (discovery.RediscoverPolicy == DiscoveryRediscoverPolicy.Never) return;` before scheduling the first `RediscoverTick`.
- In `HandleRediscoverAsync`: after publishing `DiscoveredNodesReady`, when the policy is `Once`, do NOT schedule another tick (log Debug "policy=Once, single pass" and return). When `UntilStable`, keep today's stop-on-stable + cap logic. (Read the live policy via `((ITagDiscovery)_driver).RediscoverPolicy`.)
- Keep the generation guard intact.
**Step 4 — Verify:** the 3 tests pass; the full `DriverInstanceActorDiscoveryTests` + `Runtime.Tests` suite stays green; build 0 warnings.
**Step 5 — Commit:** `git commit -m "feat(otopcua): DriverInstanceActor honors RediscoverPolicy (Never/Once/UntilStable) (follow-up B)"`
---
### Task 4: TriggerRediscovery message + handler (follow-up C, part 1)
**Classification:** standard
**Estimated implement time:** ~4 min
**Parallelizable with:** none (serial after Task 3 on the same file)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (message decls near `RediscoverTick` ~110-115; add a `Connected`-state receive)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
**Context:** Task 7 (`DriverHostActor`) will `Tell` a driver child to re-run discovery after a rebind. The child must accept that message and only act when `Connected`.
**Step 1 — Failing tests (2):**
1. Send `TriggerRediscovery` to an actor whose driver is `Connected` → it runs a discovery pass and publishes `DiscoveredNodesReady` (respecting policy: a `Never` driver does NOT).
2. Send `TriggerRediscovery` before connect / while not `Connected` → no `DiscoveredNodesReady`, no crash (no-op).
**Step 2 — Verify they fail:** message type doesn't exist.
**Step 3 — Implement:**
- Add `public sealed record TriggerRediscovery();` near the other public messages.
- In the `Connected` state, add a receive for `TriggerRediscovery` that calls `StartDiscovery()` (which already honors policy + the `ITagDiscovery` guard, and uses the current `_initGeneration`).
- In other states, either don't register the receive (so it's unhandled = no-op) or register a no-op. Prefer registering only in `Connected` so a non-connected child silently ignores it (verify the actor's state-machine style — match how other state-scoped messages are handled). Ensure no `Unhandled`-logging noise; if the actor logs unhandled messages, add an explicit ignore in the relevant states.
**Step 4 — Verify:** both tests pass; suite green; build 0 warnings.
**Step 5 — Commit:** `git commit -m "feat(otopcua): DriverInstanceActor.TriggerRediscovery message (follow-up C)"`
---
### Task 5: EquipmentNode carries DriverInstanceId/DeviceId/DeviceHost (follow-up E, projection)
**Classification:** high-risk
**Estimated implement time:** ~5 min
**Parallelizable with:** Task 1, Task 2
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceComposer.cs` (`EquipmentNode` record line 61; projection ~326-332; `Compose` signatures ~281-312)
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs` (`ReadEquipmentNode` ~810-820; the equipment decode call ~204; `Empty()` ~362-367; add a `Devices`-array → `DeviceId`→host map)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/` (composer projection test) and the existing artifact-decode/parity test for `EquipmentNode` (search `tests/` for `ReadEquipmentNode`/`EquipmentNodes`/`DeploymentArtifact` coverage; if a Compose-vs-decode parity test exists, extend it)
**Context:** The artifact already serialises full `Equipment` rows (incl. nullable `DriverInstanceId`, `DeviceId`) and a full `Devices` array (each `Device` has `DeviceId` + schemaless `DeviceConfig` JSON containing FOCAS's `HostAddress`). `Compose` (pure) and `DeploymentArtifact` (decode) MUST produce identical `EquipmentNode`s. `_lastComposition` (used by the resolver) always comes from decode, but parity is still required by tests.
**Step 1 — Failing tests:**
- Composer: given an `Equipment` with `DriverInstanceId="d1"`, `DeviceId="dev1"`, and a `Device{DeviceId="dev1", DeviceConfig={"HostAddress":"10.0.0.5:8193"}}`, `Compose(...)` yields `EquipmentNode` with `DriverInstanceId=="d1"`, `DeviceId=="dev1"`, `DeviceHost=="10.0.0.5:8193"`; with no device assigned → all three null.
- Decode: an artifact JSON whose `Equipment` element has those fields + a matching `Devices` element decodes to the same `EquipmentNode`.
**Step 2 — Verify they fail:** `EquipmentNode` has no such fields.
**Step 3 — Implement:**
- Extend the record (defaulted params keep all existing call sites compiling):
```csharp
public sealed record EquipmentNode(
string EquipmentId,
string DisplayName,
string UnsLineId,
string? DriverInstanceId = null,
string? DeviceId = null,
string? DeviceHost = null);
```
- Add a shared host-extraction helper usable by BOTH sides (place it where both can call it without a new project dependency — e.g. a `public static string? TryExtractDeviceHost(string? deviceConfigJson)` on `AddressSpaceComposer`, parsing the top-level `"HostAddress"` string from the `DeviceConfig` JSON; return null if absent/unparseable). Add a normalization step (trim; lower-case host) and DOCUMENT that the discovered device-host folder segment must be normalized the same way in Task 9.
- `Compose`: add an optional `IReadOnlyList<Device>? devices = null` param to BOTH overloads (forward from the 5-arg overload as empty). Build `deviceHostById = devices.ToDictionary(d => d.DeviceId, d => TryExtractDeviceHost(d.DeviceConfig))`. In the equipment projection, set `DriverInstanceId: e.DriverInstanceId`, `DeviceId: e.DeviceId`, `DeviceHost: e.DeviceId is null ? null : deviceHostById.GetValueOrDefault(e.DeviceId)`.
- `DeploymentArtifact`: read the `Devices` array (decode `DeviceId` + `DeviceConfig`) into a `DeviceId`→host map using the SAME `TryExtractDeviceHost` helper; thread it into `ReadEquipmentNode` (change its signature to accept the map, or do a post-pass) so it reads `DriverInstanceId`/`DeviceId` from the element and resolves `DeviceHost` from the map. Update `Empty()` only if its arity changed (it won't — record params are defaulted).
- **Parity:** ensure the decode-side host normalization is byte-identical to `Compose`'s (same helper). If a Compose-vs-decode parity test exists, pass the same `Devices` to `Compose` in that test.
**Step 4 — Verify:** new tests pass; `OpcUaServer.Tests` + `Runtime.Tests` green; build 0 warnings. **Run the existing artifact-parity test** — it MUST stay green.
**Step 5 — Commit:** `git commit -m "feat(otopcua): EquipmentNode carries DriverInstanceId/DeviceId/DeviceHost (follow-up E projection)"`
---
### Task 6: DriverHostActor — cache-as-dict + driver-level equipment resolution (follow-up E, part 1)
**Classification:** high-risk
**Estimated implement time:** ~5 min
**Parallelizable with:** none (serial: first DriverHostActor task; needs Task 5)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (`_discoveredByDriver` field ~168; `HandleDiscoveredNodes` ~580-639; `ApplyDiscoveredPlan` ~658-701; `RoutingEquals`; redeploy re-inject tail ~1247-1290)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
**Context:** Today `_discoveredByDriver` is `Dictionary<string, DiscoveredInjectionPlan>` (one plan per driver) and equipment is resolved ONLY from authored `EquipmentTags`. This task (1) changes the cache value to a per-equipment map so Task 9 can add multiple equipments, and (2) makes resolution also use the equipment-level driver link so a driver with an assigned equipment but ZERO authored tags still grafts. **Still requires exactly one resolved equipment here** (multi-device is Task 9) — >1 keeps the current warn+skip.
**Step 1 — Failing tests:**
- Tag-less graft: composition has an `EquipmentNode{DriverInstanceId="d1"}` with NO authored `EquipmentTags` for `d1`; `DiscoveredNodesReady("d1", nodes)` → nodes graft under that equipment (today: skipped with "no equipment/authored tags").
- Regression: the existing single-equipment-with-authored-tags test still grafts identically (collapse retained).
**Step 2 — Verify it fails:** tag-less case is skipped today.
**Step 3 — Implement:**
- Change `_discoveredByDriver` to `Dictionary<string, IReadOnlyDictionary<string, DiscoveredInjectionPlan>>` (driverId → (equipmentId → plan)). Update ALL readers: `HandleDiscoveredNodes` short-circuit, `ApplyDiscoveredPlan`, and the redeploy re-inject tail must iterate the inner map.
- New resolution in `HandleDiscoveredNodes`: candidate equipments =
`_lastComposition.EquipmentNodes.Where(e => e.DriverInstanceId == driverId).Select(e => e.EquipmentId)`
**** the existing authored-tag-derived set. Distinct.
- 0 → log Info, skip (unchanged message).
- 1 → resolve `equipmentId`; authoredRefs for that driver as today; `DiscoveredNodeMapper.Map(equipmentId, nodes, authoredRefs)`; cache as a 1-entry inner map; apply.
- >1 → for THIS task, keep `_log.Warning(... "multi-equipment-per-driver is handled in the multi-device path")` + skip. (Task 9 replaces this branch.)
- `ApplyDiscoveredPlan`: keep applying a single `(equipmentId, plan)`; callers now iterate the inner map and call it per entry. The subscription-merge union must include ALL discovered routing keys across the driver's plans (so a multi-plan driver subscribes every device's refs). Keep the authored value/alarm ref computation.
- `RoutingEquals` short-circuit: compare the FULL new inner-map routing against the cached inner-map routing (skip re-apply only when every equipment's routing is unchanged).
- Redeploy re-inject tail: iterate `_discoveredByDriver`; for each driver, re-resolve candidates from the CURRENT composition; per cached `(equipmentId, plan)` entry, keep the existing drop rules (equipment no longer resolves / plan NodeIds not scoped to `equipmentId`) but applied per-entry; re-apply surviving entries. (Task 7 will add the re-trigger on drop; Task 8 the de-dup.)
**Step 4 — Verify:** new + existing `DriverHostActorDiscoveryTests` green; `Runtime.Tests` green; build 0 warnings.
**Step 5 — Commit:** `git commit -m "feat(otopcua): driver-level equipment resolution + per-equipment discovered-plan cache (follow-up E)"`
---
### Task 7: DriverHostActor — re-trigger discovery on rebind drop (follow-up C, part 2)
**Classification:** high-risk
**Estimated implement time:** ~4 min
**Parallelizable with:** none (serial after Task 6; needs Task 4's message)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (redeploy re-inject tail drop branches ~1264-1288; update the deliberate-`won't-fix` comment)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
**Context:** When the re-inject tail DROPS a cached plan because the equipment rebound/no-longer-resolves, the FixedTree stays absent under the new equipment until the driver's next natural reconnect. Re-trigger discovery so it re-grafts promptly.
**Step 1 — Failing test:** simulate a redeploy where a driver's equipment changed (cached plan scoped to old `EQ-1`, new composition binds the driver to `EQ-2`). Assert the driver child receives `DriverInstanceActor.TriggerRediscovery` after the drop. (Use the test harness's child-probe/TestProbe pattern already used in this file for asserting messages to driver children.)
**Step 2 — Verify it fails:** no re-trigger today.
**Step 3 — Implement:** in each drop branch (the two `Remove` sites), after removing the entry, `Tell` that driver's child actor `new DriverInstanceActor.TriggerRediscovery()` (guard: only if the child exists in `_children`). Update the inline comment: the previous "we deliberately do NOT add re-trigger logic" note becomes a description of the new re-trigger (discovery-only, idempotent, child no-ops if not `Connected`). If a driver maps to MULTIPLE cached equipment entries and only one drops, still send a single `TriggerRediscovery` (discovery re-resolves all of them) — de-dupe so a driver is told at most once per re-inject pass.
**Step 4 — Verify:** test passes; suite green; build 0 warnings.
**Step 5 — Commit:** `git commit -m "feat(otopcua): re-trigger discovery on config-unchanged rebind (follow-up C)"`
---
### Task 8: DriverHostActor — single SetDesiredSubscriptions per redeploy (follow-up D)
**Classification:** high-risk
**Estimated implement time:** ~5 min
**Parallelizable with:** none (serial after Task 7)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (`PushDesiredSubscriptions` bulk loop ~1204; re-inject tail interaction)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
**Context:** During an in-process redeploy a cached driver gets the bulk authored-only `SetDesiredSubscriptions` (line 1204) AND then the union from `ApplyDiscoveredPlan` (line 697) — one extra unsub/resub blip. Make it exactly one send per driver.
**Step 1 — Failing test:** redeploy with one driver that has a cached discovered plan; assert the driver child receives `SetDesiredSubscriptions` EXACTLY ONCE during the redeploy, and that the single payload is the authoreddiscovered UNION. Add a second test: a driver whose cached plan is DROPPED in the re-inject tail (rebind) still receives exactly one `SetDesiredSubscriptions` carrying the AUTHORED-ONLY set (fallback) — its authored subscriptions must not be lost.
**Step 2 — Verify it fails:** today the cached-driver case sends twice.
**Step 3 — Implement:**
- In the bulk loop, SKIP the send for any `driverId` present in `_discoveredByDriver` (capture the key set BEFORE the re-inject tail runs).
- Re-inject tail: when a cached plan is APPLIED, `ApplyDiscoveredPlan` already sends the union (covers authored). When a cached plan is DROPPED (all entries for the driver removed → the driver no longer has any cached plan), send the authored-only `SetDesiredSubscriptions` for that driver as a fallback (mirror the bulk-loop payload: authored value refs + alarm refs, `SubscriptionPublishingInterval`).
- Ensure the invariant holds for drivers WITHOUT a cached plan (unchanged: single bulk send) and drivers added/removed by the reconcile.
**Step 4 — Verify:** both tests pass; the existing redeploy/restore tests stay green (watch for any test asserting the old double-send count); build 0 warnings.
**Step 5 — Commit:** `git commit -m "perf(otopcua): one SetDesiredSubscriptions per driver per redeploy (follow-up D)"`
---
### Task 9: DriverHostActor — multi-device-per-driver partition (follow-up E, part 2)
**Classification:** high-risk
**Estimated implement time:** ~5 min
**Parallelizable with:** none (serial after Task 8; needs Task 5's DeviceHost)
**Files:**
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (`HandleDiscoveredNodes` >1-candidate branch from Task 6)
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
**Context:** Replace Task 6's ">1 candidate → warn+skip" with a real partition. Each candidate equipment has `EquipmentNode.DeviceHost` (from Task 5). The discovered nodes carry a device-host folder segment at `FolderPathSegments[1]` (FOCAS uses `device.HostAddress`). Partition nodes by that segment, normalize it the SAME way Task 5 normalized `DeviceHost`, and map each device's subset under the matching equipment via the existing `DiscoveredNodeMapper.Map` (a single-device subset → collapse kicks in per equipment → clean `EQ-n/FOCAS/Identity/...`).
**Step 1 — Failing tests:**
- Multi-device: driver `d1` resolves to `EQ-A{DeviceHost=h1}` and `EQ-B{DeviceHost=h2}`; discovered nodes split across folder segments `h1`/`h2`; assert `h1`'s subtree grafts under `EQ-A` and `h2`'s under `EQ-B`, each routing-keyed correctly, and `_discoveredByDriver["d1"]` has two entries.
- Unmatched device-host → warn-skip: a discovered segment `h3` with no matching equipment is NOT grafted (logged Warning), while `h1`/`h2` still graft.
- Degenerate: >1 candidate but NO `DeviceHost` data anywhere → falls back to warn+skip (no crash, no mis-graft).
**Step 2 — Verify it fails:** Task 6 left this as warn+skip.
**Step 3 — Implement:** in the >1-candidate branch, build `hostToEquipment = candidates.Where(e => e.DeviceHost != null).ToDictionary(Normalize(e.DeviceHost), e.EquipmentId)` (guard duplicate hosts → warn+skip the ambiguous host). Partition `nodes` by `Normalize(FolderPathSegments.Count >= 2 ? FolderPathSegments[1] : null)`. For each partition with a matching equipment: compute that equipment's authoredRefs, `Map(equipmentId, partitionNodes, authoredRefs)`, collect into the inner `(equipmentId → plan)` map. Unmatched partitions → `_log.Warning` + skip. Cache the multi-entry inner map and apply every entry (Task 6 made apply per-entry). Use the SAME normalization helper from Task 5 (factor it so both call it).
**Step 4 — Verify:** all three tests pass; single-device + tag-less tests from Task 6 still green; `Runtime.Tests` + `OpcUaServer.Tests` + FOCAS suites green; build 0 warnings.
**Step 5 — Commit:** `git commit -m "feat(otopcua): multi-device-per-driver FixedTree partition (follow-up E)"`
---
### Task 10: Docs — update follow-up notes + design statuses
**Classification:** trivial
**Estimated implement time:** ~3 min
**Parallelizable with:** none (after Task 9)
**Files:**
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md` (the "Follow-ups surfaced during the review chain" section + the decisions-table multi-device row — mark AE DONE, note the rebind re-trigger now exists)
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-followups-design.md` (Status → Implemented)
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-RESUME.md` (§3 — strike the now-closed follow-ups)
**Steps:** update the prose to reflect what shipped (each follow-up + the fact that E required no migration / no artifact change; the rebind re-trigger reversed the earlier `won't-fix`, cleanly). Commit: `git commit -m "docs(otopcua): record FixedTree follow-ups A-E as implemented"`
---
### Task 11: Build + full offline suite + regression gate
**Classification:** standard
**Estimated implement time:** ~4 min (mostly test wall-time)
**Parallelizable with:** none (final; after Tasks 2, 4, 9, 10)
**Files:** none (verification only)
**Steps:**
1. `dotnet build ZB.MOM.WW.OtOpcUa.slnx`**0 errors, 0 warnings**.
2. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~Runtime.Tests"` → all green.
3. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~OpcUaServer.Tests"` → all green.
4. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~FOCAS"` → all green (live-wire integration tests skip without the CNC — expected).
5. Confirm the validated single-device FOCAS injection path is unchanged (the relevant `DriverHostActorDiscoveryTests`/end-to-end test passes untouched). Report counts. Do NOT run a full-solution `dotnet test` (net48 Wonderware testhost can't run on macOS).
No commit (verification). Live wonder re-validation is optional + user-gated.
@@ -0,0 +1,23 @@
{
"planPath": "docs/plans/2026-06-26-otopcua-fixedtree-followups.md",
"tasks": [
{"id": 1, "subject": "Task 1: Injectable discovery timeout (A)", "status": "completed", "nativeId": 38, "commits": ["c2c368dc"]},
{"id": 2, "subject": "Task 2: RediscoverPolicy enum + driver overrides (B1)", "status": "completed", "nativeId": 39, "commits": ["a378b572", "efbdaf85"]},
{"id": 3, "subject": "Task 3: DriverInstanceActor honors policy (B2)", "status": "completed", "blockedBy": [1, 2], "nativeId": 40, "commits": ["ce34816a", "a1a655e6"]},
{"id": 4, "subject": "Task 4: TriggerRediscovery message + handler (C1)", "status": "completed", "blockedBy": [3], "nativeId": 41, "commits": ["f7358bf4", "e7d5ebe9"]},
{"id": 5, "subject": "Task 5: EquipmentNode DriverInstanceId/DeviceId/DeviceHost (E projection)", "status": "completed", "nativeId": 42, "commits": ["cb7ce7f1", "915492a7"]},
{"id": 6, "subject": "Task 6: DriverHostActor cache-as-dict + driver-level resolution (E1)", "status": "completed", "blockedBy": [5], "nativeId": 43, "commits": ["adcd7b57"]},
{"id": 7, "subject": "Task 7: Re-trigger discovery on rebind drop (C2)", "status": "completed", "blockedBy": [4, 6], "nativeId": 44, "commits": ["53367148", "cde16063"]},
{"id": 8, "subject": "Task 8: Single SetDesiredSubscriptions per redeploy (D)", "status": "completed", "blockedBy": [7], "nativeId": 45, "commits": ["05c82079", "51721df5"]},
{"id": 9, "subject": "Task 9: Multi-device-per-driver partition (E2)", "status": "completed", "blockedBy": [5, 8], "nativeId": 46, "commits": ["50f08635", "0074f37a"]},
{"id": 10, "subject": "Task 10: Docs — follow-up notes + statuses", "status": "completed", "blockedBy": [9], "nativeId": 47},
{"id": 11, "subject": "Task 11: Build + offline suite + regression gate", "status": "pending", "blockedBy": [2, 4, 9, 10], "nativeId": 48}
],
"nativeTaskIds": {
"1": 38, "2": 39, "3": 40, "4": 41, "5": 42, "6": 43,
"7": 44, "8": 45, "9": 46, "10": 47, "11": 48
},
"lastUpdated": "2026-06-26T00:00:00Z",
"status": "code+docs complete; final build/suite gate pending",
"branch": "feat/focas-fixedtree-equipment-injection"
}
@@ -7,6 +7,12 @@
# Any divergence from these defaults must be deliberate and recorded in docs/v2/Architecture.md.
akka {
# Akka logger wiring (route ILoggingAdapter → Serilog) is configured via Akka.Hosting's
# ConfigureLoggers in ServiceCollectionExtensions.WithOtOpcUaClusterBootstrap — HOCON
# `akka.loggers` alone is not honored by Akka.Hosting. logger-startup-timeout is kept here
# since the Serilog logger can be slow to initialize at startup.
logger-startup-timeout = 30s
extensions = [
"Akka.Cluster.Tools.PublishSubscribe.DistributedPubSubExtensionProvider, Akka.Cluster.Tools"
]
@@ -1,5 +1,7 @@
using Akka.Cluster.Hosting;
using Akka.Event;
using Akka.Hosting;
using Akka.Logger.Serilog;
using Akka.Remote.Hosting;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@@ -53,6 +55,19 @@ public static class ServiceCollectionExtensions
builder.AddHocon(HoconLoader.LoadBaseConfig(), HoconAddMode.Append);
// Route Akka's internal ILoggingAdapter (DriverHostActor, DriverInstanceActor, cluster
// events, …) into Serilog so those logs reach the same sinks as the MEL/Serilog application
// logs. Akka.Hosting owns logger setup, so HOCON `akka.loggers` alone is not honored — the
// logger must be registered through ConfigureLoggers. Without this the actor graph logs only
// to the default StandardOutLogger (discarded under the Windows service host), which is why
// the driver-role actors were invisible during the 2026-06 data-plane investigation.
builder.ConfigureLoggers(setup =>
{
setup.LogLevel = LogLevel.DebugLevel;
setup.ClearLoggers();
setup.AddLogger<SerilogLogger>();
});
builder.WithRemoting(new RemoteOptions
{
HostName = options.Hostname,
@@ -10,6 +10,7 @@
<PackageReference Include="Akka.Cluster"/>
<PackageReference Include="Akka.Cluster.Hosting"/>
<PackageReference Include="Akka.Cluster.Tools"/>
<PackageReference Include="Akka.Logger.Serilog"/>
<PackageReference Include="Akka.Remote.Hosting"/>
<PackageReference Include="Microsoft.Extensions.Hosting"/>
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions"/>
@@ -70,6 +70,10 @@ public sealed class DeferredAddressSpaceSink : IOpcUaAddressSpaceSink, ISurgical
/// <summary>Rebuilds the address space through the inner sink.</summary>
public void RebuildAddressSpace() => _inner.RebuildAddressSpace();
/// <summary>Announces a runtime NodeAdded model-change (discovered-node injection) through the inner sink.</summary>
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
public void RaiseNodesAddedModelChange(string affectedNodeId) => _inner.RaiseNodesAddedModelChange(affectedNodeId);
/// <summary>Forwards an in-place tag-attribute update (F10b) to the inner sink when it supports the
/// surgical capability. Returns false otherwise — before the real <c>SdkAddressSpaceSink</c> is
/// swapped in (inner is still the null sink), or any inner sink that isn't surgical — so the caller
@@ -84,6 +84,14 @@ public interface IOpcUaAddressSpaceSink
/// successful deployment apply so the node manager reflects the new config. Idempotent.
/// </summary>
void RebuildAddressSpace();
/// <summary>
/// Announce that nodes were added at runtime (discovered-node injection) under
/// <paramref name="affectedNodeId"/> so subscribed clients refresh their browse
/// (Part 3 GeneralModelChangeEvent, verb NodeAdded).
/// </summary>
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
void RaiseNodesAddedModelChange(string affectedNodeId);
}
/// <summary>OPC UA status code projection — Good / Uncertain / Bad. Real SDK has finer-grained
@@ -114,4 +122,7 @@ public sealed class NullOpcUaAddressSpaceSink : IOpcUaAddressSpaceSink
/// <inheritdoc />
public void RebuildAddressSpace() { }
/// <inheritdoc />
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
}
@@ -1,5 +1,17 @@
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
/// <summary>How aggressively the host re-runs post-connect discovery for this driver.</summary>
public enum DiscoveryRediscoverPolicy
{
/// <summary>Retry every interval up to the cap or until the captured set is non-empty and stable
/// (for drivers whose discovered shape fills in asynchronously after connect, e.g. the FOCAS FixedTree).</summary>
UntilStable,
/// <summary>Run exactly one discovery pass on connect (drivers that discover synchronously in DiscoverAsync).</summary>
Once,
/// <summary>Never run post-connect discovery.</summary>
Never,
}
/// <summary>
/// Driver capability for discovering tags and hierarchy from the backend.
/// Streams discovered nodes into <see cref="IAddressSpaceBuilder"/> rather than
@@ -14,4 +26,7 @@ public interface ITagDiscovery
/// <param name="builder">The address space builder to stream discovered nodes into.</param>
/// <param name="cancellationToken">A cancellation token for the discovery operation.</param>
Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken);
/// <summary>Post-connect re-discovery policy. Default preserves the original retry-until-stable behavior.</summary>
DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;
}
@@ -998,6 +998,14 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
// ---- ITagDiscovery ----
/// <summary>
/// Run-once: <see cref="DiscoverAsync"/> emits pre-declared tags and (when
/// EnableControllerBrowse is set) fully awaits the @tags symbol-table walk + UDT-shape
/// resolution within the single call, streaming the complete node set in one pass —
/// nothing fills in asynchronously after connect, so a single discovery pass is sufficient.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
/// <summary>
/// Stream the driver's tag set into the builder. Pre-declared tags from
/// <see cref="AbCipDriverOptions.Tags"/> emit first; optionally, the
@@ -422,6 +422,13 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
// ---- ITagDiscovery ----
/// <summary>
/// Run-once: <see cref="DiscoverAsync"/> emits the complete node set synchronously from
/// the configured device/tag tables within a single pass — there is no shape that fills
/// in asynchronously after connect, so a single discovery pass is sufficient.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
/// <summary>
/// Discovers tags and populates the address space asynchronously.
/// </summary>
@@ -306,7 +306,16 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
Volatile.Read(ref _health).LastSuccessfulRead,
$"FOCAS status 0x{status:X8} reading {reference}"));
}
catch (OperationCanceledException) { throw; }
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { throw; }
catch (OperationCanceledException)
{
// Per-call timeout (not external cancellation) — the read stalled past the device
// Timeout budget. Surface a recoverable comm error so the BadWaitingForInitialData
// seed is overwritten and health degrades, instead of the read hanging forever.
results[i] = new DataValueSnapshot(null, FocasStatusMapper.BadCommunicationError, null, now);
Volatile.Write(ref _health, new DriverHealth(DriverState.Degraded,
Volatile.Read(ref _health).LastSuccessfulRead, $"FOCAS read timed out for {reference}"));
}
catch (Exception ex)
{
results[i] = new DataValueSnapshot(null, FocasStatusMapper.BadCommunicationError, null, now);
@@ -356,7 +365,15 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
var status = await client.WriteAsync(parsed, def.DataType, w.Value, cancellationToken).ConfigureAwait(false);
results[i] = new WriteResult(status);
}
catch (OperationCanceledException) { throw; }
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { throw; }
catch (OperationCanceledException)
{
// Per-call timeout (not external cancellation) — the write stalled past the device
// Timeout budget. Surface a recoverable comm error rather than aborting the batch.
results[i] = new WriteResult(FocasStatusMapper.BadCommunicationError);
Volatile.Write(ref _health, new DriverHealth(DriverState.Degraded,
Volatile.Read(ref _health).LastSuccessfulRead, $"FOCAS write timed out for {w.FullReference}"));
}
catch (NotSupportedException nse)
{
results[i] = new WriteResult(FocasStatusMapper.BadNotSupported);
@@ -384,6 +401,14 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
// ---- ITagDiscovery ----
/// <summary>
/// Retry-until-stable: the FixedTree subtree is filled in asynchronously by
/// <see cref="FixedTreeLoopAsync"/> a couple of seconds AFTER connect, so the first
/// post-connect <see cref="DiscoverAsync"/> pass would miss it — the host must re-run
/// discovery until the captured node set is non-empty and stable.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;
/// <summary>Discovers tags and builds the OPC UA address space asynchronously.</summary>
/// <param name="builder">The address space builder for constructing the OPC UA namespace.</param>
/// <param name="cancellationToken">Cancellation token for the operation.</param>
@@ -1113,7 +1138,11 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
device.Client = null;
}
device.Client = _clientFactory.Create();
// Wrap the raw wire client so every operation on the device's single FOCAS/2 socket is
// serialized (request→response on one socket cannot interleave) and time-bounded. Without
// this, the equipment poll, fixed-tree loop, probe, and recycle loop collide on the shared
// socket and a stalled read blocks forever — leaving bound tags at BadWaitingForInitialData.
device.Client = new SynchronizedFocasClient(_clientFactory.Create(), _options.Timeout);
try
{
await device.Client.ConnectAsync(device.ParsedAddress, _options.Timeout, ct).ConfigureAwait(false);
@@ -195,12 +195,41 @@ public static class FocasDriverFactoryExtensions
AllowTrailingCommas = true,
};
/// <summary>
/// Reads a JSON property as a string, tolerating a JSON <b>number</b> token as well. The
/// AdminUI persists the FOCAS <c>Series</c> enum as its integer value (e.g. <c>"series":6</c>),
/// while this DTO models <c>Series</c> as a string handed to <see cref="ParseSeries"/>
/// (Enum.TryParse accepts the numeric form). Without this, System.Text.Json throws
/// "Cannot get the value of a token type 'Number' as a string" on the bare number and the
/// driver falls back to a stub. Accepts string / number / null and emits a string.
/// </summary>
internal sealed class FlexibleStringConverter : JsonConverter<string?>
{
public override string? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) =>
reader.TokenType switch
{
JsonTokenType.String => reader.GetString(),
JsonTokenType.Number => reader.TryGetInt64(out var n)
? n.ToString(System.Globalization.CultureInfo.InvariantCulture)
: reader.GetDouble().ToString(System.Globalization.CultureInfo.InvariantCulture),
JsonTokenType.Null => null,
_ => throw new JsonException($"Expected string, number, or null but got {reader.TokenType}."),
};
public override void Write(Utf8JsonWriter writer, string? value, JsonSerializerOptions options)
{
if (value is null) writer.WriteNullValue();
else writer.WriteStringValue(value);
}
}
internal sealed class FocasDriverConfigDto
{
/// <summary>Gets or sets the FOCAS client factory backend name (e.g. "wire" or "stub").</summary>
public string? Backend { get; init; }
/// <summary>Gets or sets the CNC series for this driver.</summary>
[JsonConverter(typeof(FlexibleStringConverter))]
public string? Series { get; init; }
/// <summary>Gets or sets the operation timeout in milliseconds.</summary>
@@ -234,6 +263,7 @@ public static class FocasDriverFactoryExtensions
public string? DeviceName { get; init; }
/// <summary>Gets or sets the CNC series for this device (overrides top-level series if provided).</summary>
[JsonConverter(typeof(FlexibleStringConverter))]
public string? Series { get; init; }
/// <summary>
@@ -21,9 +21,19 @@ public sealed record FocasHostAddress(string Host, int Port)
{
if (string.IsNullOrWhiteSpace(value)) return null;
const string prefix = "focas://";
if (!value.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) return null;
var body = value[prefix.Length..];
// Canonical form is focas://{ip}[:{port}], but the AdminUI persists the device host as a
// scheme-less "{ip}[:{port}]" (e.g. "10.201.31.5:8193"). Accept that too: take the body
// after focas:// when present, else the whole value when it carries NO other URI scheme
// (a "://" that isn't ours — e.g. http:// — is still rejected). The host-contains-colon
// guard below then rejects malformed scheme typos like "focas:10.0.0.5:8193".
string body;
if (value.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
body = value[prefix.Length..];
else if (!value.Contains("://", StringComparison.Ordinal))
body = value;
else
return null;
if (string.IsNullOrEmpty(body)) return null;
var colonIdx = body.LastIndexOf(':');
@@ -39,7 +49,9 @@ public sealed record FocasHostAddress(string Host, int Port)
{
host = body;
}
if (string.IsNullOrEmpty(host)) return null;
// Empty host, or a host still carrying a colon (e.g. the malformed "focas:10.0.0.5" left
// when someone wrote "focas:10.0.0.5:8193" without the //), is invalid.
if (string.IsNullOrEmpty(host) || host.Contains(':', StringComparison.Ordinal)) return null;
return new FocasHostAddress(host, port);
}
}
@@ -0,0 +1,152 @@
namespace ZB.MOM.WW.OtOpcUa.Driver.FOCAS;
/// <summary>
/// Decorates an <see cref="IFocasClient"/> so that every wire operation on the device's
/// single FOCAS/2 socket is (1) <b>serialized</b> against all other operations and
/// (2) <b>time-bounded</b>.
/// </summary>
/// <remarks>
/// <para>FOCAS/2 over TCP:8193 is a strict request→response protocol on ONE socket. The
/// driver holds a single <see cref="IFocasClient"/> per device, but several independent loops
/// read from it concurrently — the equipment poll (<see cref="FocasDriver.ReadAsync"/>), the
/// fixed-tree loop (<c>FixedTreeLoopAsync</c>), the connectivity probe, and the recycle loop.
/// Without serialization, two reads interleave their <c>send(request); read(response)</c> on the
/// same socket: one reader consumes the other's response PDU and the victim then blocks forever
/// waiting for bytes that never arrive — leaving the bound OPC UA node stuck at
/// <c>BadWaitingForInitialData</c>. This was the root cause of FOCAS equipment tags never
/// surfacing a value while the probe reported HEALTHY (the probe reads work single-threaded on a
/// dev box, but collide deployed once the fixed-tree loop runs concurrently).</para>
///
/// <para>The gate (<see cref="SemaphoreSlim"/> of count 1) makes each request→response atomic on
/// the socket. The per-call timeout ensures a stalled response can never hold the gate — and thus
/// the socket — indefinitely; a hung read surfaces as a recoverable error at the configured
/// <c>Timeout</c> budget instead of permanent silence. The gate and timeout are paired
/// deliberately: a lock around an <i>unbounded</i> read would deadlock all I/O for the device.</para>
///
/// <para><see cref="ConnectAsync"/> and <see cref="ProbeAsync"/> are serialized but NOT bounded by
/// this decorator's call timeout — they carry their own budgets (the connect timeout argument and
/// the probe's caller-supplied linked token respectively), and double-bounding would shrink them.</para>
/// </remarks>
public sealed class SynchronizedFocasClient : IFocasClient
{
private readonly IFocasClient _inner;
private readonly TimeSpan _callTimeout;
private readonly SemaphoreSlim _gate = new(1, 1);
/// <summary>Wraps <paramref name="inner"/> with per-device serialization + a per-call timeout.</summary>
/// <param name="inner">The underlying FOCAS client to serialize access to.</param>
/// <param name="callTimeout">
/// The budget applied to each data read/write. <see cref="TimeSpan.Zero"/> or negative disables
/// the per-call timeout (callers' own cancellation tokens still apply).
/// </param>
public SynchronizedFocasClient(IFocasClient inner, TimeSpan callTimeout)
{
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
_callTimeout = callTimeout;
}
/// <inheritdoc />
public bool IsConnected => _inner.IsConnected;
/// <inheritdoc />
public Task ConnectAsync(FocasHostAddress address, TimeSpan timeout, CancellationToken cancellationToken) =>
RunGatedAsync(ct => _inner.ConnectAsync(address, timeout, ct), cancellationToken);
/// <inheritdoc />
public Task<bool> ProbeAsync(CancellationToken cancellationToken) =>
RunGatedAsync(ct => _inner.ProbeAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<(object? value, uint status)> ReadAsync(
FocasAddress address, FocasDataType type, CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.ReadAsync(address, type, ct), cancellationToken);
/// <inheritdoc />
public Task<uint> WriteAsync(
FocasAddress address, FocasDataType type, object? value, CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.WriteAsync(address, type, value, ct), cancellationToken);
/// <inheritdoc />
public Task<IReadOnlyList<FocasActiveAlarm>> ReadAlarmsAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.ReadAlarmsAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<FocasSysInfo> GetSysInfoAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetSysInfoAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<IReadOnlyList<FocasAxisName>> GetAxisNamesAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetAxisNamesAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<IReadOnlyList<FocasSpindleName>> GetSpindleNamesAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetSpindleNamesAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<FocasDynamicSnapshot> ReadDynamicAsync(int axisIndex, CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.ReadDynamicAsync(axisIndex, ct), cancellationToken);
/// <inheritdoc />
public Task<FocasProgramInfo> GetProgramInfoAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetProgramInfoAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<FocasTimer> GetTimerAsync(FocasTimerKind kind, CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetTimerAsync(kind, ct), cancellationToken);
/// <inheritdoc />
public Task<IReadOnlyList<FocasServoLoad>> GetServoLoadsAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetServoLoadsAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<IReadOnlyList<int>> GetSpindleLoadsAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetSpindleLoadsAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<IReadOnlyList<int>> GetSpindleMaxRpmsAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetSpindleMaxRpmsAsync(ct), cancellationToken);
/// <inheritdoc />
public Task<IReadOnlyList<int>> GetPositionFiguresAsync(CancellationToken cancellationToken) =>
RunBoundedAsync(ct => _inner.GetPositionFiguresAsync(ct), cancellationToken);
/// <inheritdoc />
public void Dispose()
{
_inner.Dispose();
_gate.Dispose();
}
// Gate only — the caller already governs the budget (connect timeout arg / probe linked token).
private async Task<T> RunGatedAsync<T>(Func<CancellationToken, Task<T>> op, CancellationToken ct)
{
await _gate.WaitAsync(ct).ConfigureAwait(false);
try { return await op(ct).ConfigureAwait(false); }
finally { _gate.Release(); }
}
private async Task RunGatedAsync(Func<CancellationToken, Task> op, CancellationToken ct)
{
await _gate.WaitAsync(ct).ConfigureAwait(false);
try { await op(ct).ConfigureAwait(false); }
finally { _gate.Release(); }
}
// Gate + per-call timeout. A fired timeout surfaces as OperationCanceledException whose token is
// the linked (not the caller's) token — callers distinguish it from real cancellation by testing
// their own token's IsCancellationRequested.
private async Task<T> RunBoundedAsync<T>(Func<CancellationToken, Task<T>> op, CancellationToken ct)
{
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
if (_callTimeout <= TimeSpan.Zero)
return await op(ct).ConfigureAwait(false);
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct);
linked.CancelAfter(_callTimeout);
return await op(linked.Token).ConfigureAwait(false);
}
finally { _gate.Release(); }
}
}
@@ -584,6 +584,15 @@ public sealed class GalaxyDriver
// ===== ITagDiscovery (PR 4.1) =====
/// <summary>
/// Run-once: <see cref="DiscoverAsync"/> fetches the full Galaxy hierarchy inline and
/// streams the complete node set within a single awaited call — there is no FOCAS-style
/// background cache that fills in after connect. Galaxy is a heavy network driver, so the
/// bounded post-connect retry loop is deliberately avoided; re-discovery on Galaxy
/// redeploy is handled separately via <see cref="IRediscoverable"/> + the deploy-event watcher.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
/// <inheritdoc />
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
@@ -261,6 +261,13 @@ public sealed class ModbusDriver
// ---- ITagDiscovery ----
/// <summary>
/// Run-once: <see cref="DiscoverAsync"/> emits the complete node set synchronously from
/// the configured tag table in a single pass — nothing fills in asynchronously after
/// connect, so a single discovery pass is sufficient.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
/// <summary>Discovers tags and builds the OPC UA address space.</summary>
/// <param name="builder">Address space builder.</param>
/// <param name="cancellationToken">Cancellation token.</param>
@@ -826,6 +826,14 @@ public sealed class OpcUaClientDriver : IDriver, ITagDiscovery, IReadable, IWrit
// ---- ITagDiscovery ----
/// <summary>
/// Run-once: <see cref="DiscoverAsync"/> recursively browses the remote server's address
/// space and registers every variable within the single call (browse + enrich passes are
/// fully awaited) — nothing fills in asynchronously after connect, so a single discovery
/// pass is sufficient.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
/// <summary>Discovers the remote OPC UA server's address space and materializes it through the supplied builder.</summary>
/// <param name="builder">Address space builder for materializing discovered nodes.</param>
/// <param name="cancellationToken">Cancellation token for the operation.</param>
@@ -1133,6 +1133,13 @@ public sealed class S7Driver
// ---- ITagDiscovery ----
/// <summary>
/// Run-once: <see cref="DiscoverAsync"/> emits the complete node set synchronously from
/// the configured tag table in a single pass — nothing fills in asynchronously after
/// connect, so a single discovery pass is sufficient.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
/// <summary>Discovers tags and builds the OPC UA address space.</summary>
/// <param name="builder">Address space builder.</param>
/// <param name="cancellationToken">Cancellation token.</param>
@@ -377,6 +377,14 @@ public sealed class TwinCATDriver : IDriver, IReadable, IWritable, ITagDiscovery
// ---- ITagDiscovery ----
/// <summary>
/// Run-once: <see cref="DiscoverAsync"/> emits pre-declared tags and (when
/// EnableControllerBrowse is set) fully awaits the controller symbol browse within the
/// single call, streaming the complete node set in one pass — nothing fills in
/// asynchronously after connect, so a single discovery pass is sufficient.
/// </summary>
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
/// <summary>Discovers devices and tags from ADS configuration and optionally controller symbols.</summary>
/// <param name="builder">Address space builder for adding discovered nodes.</param>
/// <param name="cancellationToken">Cancellation token.</param>
@@ -228,6 +228,14 @@ builder.Services.AddOtOpcUaHealth();
builder.Services.AddOtOpcUaObservability(builder.Configuration);
var app = builder.Build();
// AddZbSerilog registers Serilog as the MEL logging provider but does NOT assign the static
// Serilog.Log.Logger. Set it from the DI root logger so (1) static Log.* calls like the startup
// banner below emit, and (2) Akka.Logger.Serilog's SerilogLogger — which writes to Log.Logger —
// routes the actor graph's logs (DriverHostActor et al.) to the configured sinks. Must run before
// app.RunAsync() starts the ActorSystem (the Akka logger captures Log.Logger at system start).
Serilog.Log.Logger = app.Services.GetRequiredService<Serilog.ILogger>();
app.UseSerilogRequestLogging();
// Razor class library static assets (_content/<libname>/...) are served via endpoint
@@ -303,6 +303,46 @@ public sealed class AddressSpaceApplier
composition.EquipmentTags.Select(t => t.EquipmentId).Distinct(StringComparer.Ordinal).Count());
}
/// <summary>
/// Materialise driver-discovered nodes (FixedTree) under an equipment at runtime. Idempotent:
/// re-applies are cheap (the sink's EnsureFolder/EnsureVariable early-return on existing nodes), so
/// this is safely re-run after every address-space rebuild. Folders are ensured parent-first.
/// Emits a NodeAdded model-change so connected clients can refresh. Discovered nodes are read-only
/// value nodes; array discovered nodes (rare) are forced read-only like the equipment-tag pass.
/// </summary>
/// <param name="equipmentRootNodeId">The equipment root node the discovered nodes hang under; the
/// NodeAdded model-change is announced under this node.</param>
/// <param name="folders">The discovered folders to ensure (parent-first by depth).</param>
/// <param name="variables">The discovered variables to ensure (read-only value nodes).</param>
public void MaterialiseDiscoveredNodes(
string equipmentRootNodeId,
IReadOnlyList<DiscoveredFolder> folders,
IReadOnlyList<DiscoveredVariable> variables)
{
ArgumentException.ThrowIfNullOrEmpty(equipmentRootNodeId);
ArgumentNullException.ThrowIfNull(folders);
ArgumentNullException.ThrowIfNull(variables);
if (folders.Count == 0 && variables.Count == 0) return;
// Parent-first: a child folder's parent must exist before it. Ordering by '/' count == depth.
foreach (var f in folders.OrderBy(f => f.NodeId.Count(c => c == '/')))
SafeEnsureFolder(f.NodeId, f.ParentNodeId, f.DisplayName);
foreach (var v in variables)
{
// Mirror MaterialiseEquipmentTags: arrays forced read-only (the driver write path can't handle arrays).
var writable = v.Writable && !v.IsArray;
SafeEnsureVariable(v.NodeId, v.ParentNodeId, v.DisplayName, v.DataType, writable,
historianTagname: null, isArray: v.IsArray, arrayLength: v.ArrayLength);
}
_sink.RaiseNodesAddedModelChange(equipmentRootNodeId);
_logger.LogInformation(
"AddressSpaceApplier: discovered nodes materialised under {Equipment} (folders={Folders}, vars={Vars})",
equipmentRootNodeId, folders.Count, variables.Count);
}
/// <summary>
/// Materialise Equipment-namespace VirtualTags from a composition snapshot — the VirtualTag
/// analogue of <see cref="MaterialiseEquipmentTags"/>. For each <see cref="EquipmentVirtualTagPlan"/>,
@@ -58,7 +58,38 @@ public sealed record AddressSpaceComposition(
public sealed record UnsAreaProjection(string UnsAreaId, string DisplayName);
public sealed record UnsLineProjection(string UnsLineId, string UnsAreaId, string DisplayName);
public sealed record EquipmentNode(string EquipmentId, string DisplayName, string UnsLineId);
/// <summary>One UNS level-5 equipment folder in the address space. <see cref="EquipmentId"/> is the
/// logical NodeId; <see cref="DisplayName"/> is the friendly UNS Name segment; <see cref="UnsLineId"/>
/// is the parent line the folder hangs under.
/// <para><see cref="DriverInstanceId"/> / <see cref="DeviceId"/> carry the equipment's optional bindings
/// (both <c>null</c> ⇒ driver-less / no device), copied straight from the <c>Equipment</c> row.
/// <see cref="DeviceHost"/> is the device's connection host (e.g. <c>"10.0.0.5:8193"</c>) resolved from the
/// bound <c>Device</c>'s schemaless <c>DeviceConfig</c> JSON via
/// <see cref="AddressSpaceComposer.TryExtractDeviceHost"/> — <c>null</c> when there is no device, no
/// <c>HostAddress</c> in its config, or the host cannot be parsed. These three let a later task graft a
/// driver's discovered FixedTree onto an equipment that has zero authored tags, and partition a
/// multi-device driver by host. The value is normalized identically on both the live-edit composer and
/// the artifact-decode sides (single source of truth: <see cref="AddressSpaceComposer.TryExtractDeviceHost"/>);
/// the later partition task MUST normalize the driver-discovered device-host folder segment the same way
/// (trim + lower-case) so the two compare equal.</para>
/// <para><b>Address-space-rebuild interaction (accepted trade-off).</b> These three fields participate in
/// <see cref="EquipmentNode"/>'s record value-equality, which <c>AddressSpacePlan.Compute</c> uses to
/// build its changed-equipment set. So editing a <c>Device</c>'s <c>DeviceConfig</c> host/port, or
/// rebinding an equipment's <c>DriverInstanceId</c> / <c>DeviceId</c>, now yields an
/// <see cref="EquipmentNode"/> delta that triggers a full structural address-space rebuild on the next
/// deploy (a momentary subscription teardown for that equipment). This is a deliberate, accepted
/// decision: it fires only on rare operator-initiated config edits at deploy time (routine redeploys of
/// unchanged config are unaffected — the delta is empty), it is recoverable, and it is directionally
/// correct for the multi-device FixedTree re-partition (a later task). <c>AddressSpacePlan</c> is left
/// unchanged.</para></summary>
public sealed record EquipmentNode(
string EquipmentId,
string DisplayName,
string UnsLineId,
string? DriverInstanceId = null,
string? DeviceId = null,
string? DeviceHost = null);
public sealed record DriverInstancePlan(string DriverInstanceId, string DriverType, string ConfigJson);
public sealed record ScriptedAlarmPlan(string ScriptedAlarmId, string EquipmentId, string PredicateScriptId, string MessageTemplate);
@@ -277,15 +308,17 @@ public static class AddressSpaceComposer
/// <param name="equipment">The equipment.</param>
/// <param name="driverInstances">The driver instances.</param>
/// <param name="scriptedAlarms">The scripted alarms.</param>
/// <param name="devices">The per-device rows used to resolve each equipment's <c>DeviceHost</c>. <c>null</c> = none.</param>
/// <returns>The composition result.</returns>
public static AddressSpaceComposition Compose(
IReadOnlyList<UnsArea> unsAreas,
IReadOnlyList<UnsLine> unsLines,
IReadOnlyList<Equipment> equipment,
IReadOnlyList<DriverInstance> driverInstances,
IReadOnlyList<ScriptedAlarm> scriptedAlarms) =>
IReadOnlyList<ScriptedAlarm> scriptedAlarms,
IReadOnlyList<Device>? devices = null) =>
Compose(unsAreas, unsLines, equipment, driverInstances, scriptedAlarms,
Array.Empty<Tag>(), Array.Empty<Namespace>());
Array.Empty<Tag>(), Array.Empty<Namespace>(), devices: devices);
/// <summary>
/// Composes the address space build plan from the configuration entities.
@@ -299,6 +332,8 @@ public static class AddressSpaceComposer
/// <param name="namespaces">The namespaces.</param>
/// <param name="virtualTags">The Equipment-namespace virtual (calculated) tags. <c>null</c> = none.</param>
/// <param name="scripts">The scripts joined to <paramref name="virtualTags"/> by ScriptId for the expression. <c>null</c> = none.</param>
/// <param name="devices">The per-device rows (<c>DeviceId</c> + schemaless <c>DeviceConfig</c> JSON) used to resolve
/// each equipment's <c>DeviceHost</c> from its bound <c>DeviceId</c>. <c>null</c> = none.</param>
/// <returns>The composition result.</returns>
public static AddressSpaceComposition Compose(
IReadOnlyList<UnsArea> unsAreas,
@@ -309,10 +344,25 @@ public static class AddressSpaceComposer
IReadOnlyList<Tag> tags,
IReadOnlyList<Namespace> namespaces,
IReadOnlyList<VirtualTag>? virtualTags = null,
IReadOnlyList<Script>? scripts = null)
IReadOnlyList<Script>? scripts = null,
IReadOnlyList<Device>? devices = null)
{
var vtags = virtualTags ?? Array.Empty<VirtualTag>();
var resolvedScripts = scripts ?? Array.Empty<Script>();
// DeviceId → connection host, resolved once from each bound Device's schemaless DeviceConfig JSON
// via the shared TryExtractDeviceHost (single source of truth + normalization for both this
// composer and the artifact-decode mirror in DeploymentArtifact, so EquipmentNode.DeviceHost is
// byte-parity-equal). This MUST match DeploymentArtifact.BuildDeviceHostMap semantics EXACTLY:
// Ordinal comparer, skip blank/whitespace DeviceIds, and LAST-WINS on a duplicate DeviceId (a
// foreach assignment, NOT ToDictionary which would THROW on a dupe — diverging from the decode
// side's last-wins). DeviceId is DB-unique so a dupe is defensive-only.
var deviceHostById = new Dictionary<string, string?>(StringComparer.Ordinal);
foreach (var d in devices ?? Array.Empty<Device>())
{
if (string.IsNullOrWhiteSpace(d.DeviceId)) continue;
deviceHostById[d.DeviceId] = TryExtractDeviceHost(d.DeviceConfig);
}
var areas = unsAreas
.OrderBy(a => a.UnsAreaId, StringComparer.Ordinal)
.Select(a => new UnsAreaProjection(a.UnsAreaId, a.Name))
@@ -328,7 +378,15 @@ public static class AddressSpaceComposer
// DisplayName = the UNS level-5 Name segment (friendly browse name, matching the Area
// and Line projections + EquipmentNodeWalker) — NOT the colloquial MachineCode. NodeId
// stays the logical EquipmentId so browse-path resolution + ACLs are unaffected.
.Select(e => new EquipmentNode(e.EquipmentId, e.Name, e.UnsLineId))
// DriverInstanceId / DeviceId are copied straight from the row; DeviceHost resolves from the
// bound device's config (null when there's no device or no parseable HostAddress).
.Select(e => new EquipmentNode(
e.EquipmentId,
e.Name,
e.UnsLineId,
DriverInstanceId: e.DriverInstanceId,
DeviceId: e.DeviceId,
DeviceHost: e.DeviceId is null ? null : deviceHostById.GetValueOrDefault(e.DeviceId)))
.ToList();
var plans = driverInstances
@@ -493,6 +551,49 @@ public static class AddressSpaceComposer
return tagConfig;
}
/// <summary>
/// Extract a <see cref="Device"/>'s connection host from its schemaless <c>DeviceConfig</c> JSON:
/// the top-level <c>"HostAddress"</c> string (e.g. <c>"10.201.31.5:8193"</c>) — the same value a
/// FOCAS driver emits as its discovered device-host folder segment. Returns <c>null</c> when the
/// config is blank, not a JSON object, has no string <c>HostAddress</c>, or the value is
/// blank/whitespace. Never throws.
/// <para>The returned host is deterministically normalized — trimmed and lower-cased — so the
/// live-edit composer side and the artifact-decode side (<c>DeploymentArtifact</c>) agree
/// byte-for-byte. This method is the SINGLE SOURCE OF TRUTH for that normalization: the later
/// FixedTree-partition task MUST normalize the driver-discovered device-host folder segment the
/// same way (call this, or apply the identical trim + lower-case) before comparing the two.</para>
/// </summary>
/// <param name="deviceConfigJson">The device's schemaless <c>DeviceConfig</c> JSON blob.</param>
/// <returns>The normalized device host, or <c>null</c> when absent/blank/unparseable.</returns>
public static string? TryExtractDeviceHost(string? deviceConfigJson)
{
if (string.IsNullOrWhiteSpace(deviceConfigJson)) return null;
try
{
using var doc = JsonDocument.Parse(deviceConfigJson);
if (doc.RootElement.ValueKind != JsonValueKind.Object) return null;
if (!doc.RootElement.TryGetProperty("HostAddress", out var hostEl)
|| hostEl.ValueKind != JsonValueKind.String) return null;
var raw = hostEl.GetString();
if (string.IsNullOrWhiteSpace(raw)) return null;
// Deterministic normalization (trim + lower-case) so both seams produce the identical string.
return NormalizeDeviceHost(raw);
}
catch (JsonException) { return null; }
}
/// <summary>
/// The SINGLE SOURCE OF TRUTH for device-host normalization: trims surrounding whitespace and
/// lower-cases (invariant). <see cref="TryExtractDeviceHost"/> applies this to a <c>Device</c>'s
/// parsed <c>HostAddress</c>, and the FixedTree-partition path (<c>DriverHostActor</c>) applies the
/// SAME function to a driver-discovered device-host folder segment before comparing the two — so an
/// <see cref="EquipmentNode.DeviceHost"/> and a captured folder segment for the same device compare
/// equal regardless of case/whitespace. Idempotent (a value already normalized is unchanged).
/// </summary>
/// <param name="host">The raw host string (non-null; a non-empty <c>HostAddress</c> or folder segment).</param>
/// <returns>The normalized host (trimmed + lower-cased).</returns>
public static string NormalizeDeviceHost(string host) => host.Trim().ToLowerInvariant();
/// <summary>Parses the optional <c>alarm</c> object from a tag's <c>TagConfig</c> JSON. Returns null
/// when absent, non-object, or non-JSON (the tag is then a plain variable). Never throws. The
/// artifact-decode side (<c>DeploymentArtifact.ExtractTagAlarm</c>) MUST parse identically (byte-parity).</summary>
@@ -0,0 +1,8 @@
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer;
/// <summary>A folder to ensure during discovered-node injection (NodeId + parent + display).</summary>
public sealed record DiscoveredFolder(string NodeId, string? ParentNodeId, string DisplayName);
/// <summary>A read-or-write variable to ensure during discovered-node injection.</summary>
public sealed record DiscoveredVariable(
string NodeId, string ParentNodeId, string DisplayName, string DataType, bool Writable, bool IsArray, uint? ArrayLength);
@@ -1567,6 +1567,100 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
}
}
/// <summary>
/// Emit a Part 3 <c>GeneralModelChangeEvent</c> (verb <c>NodeAdded</c>) announcing that one or more
/// nodes were added UNDER <paramref name="affectedNodeId"/> at runtime — so already-connected,
/// model-aware OPC UA clients re-browse the affected node and discover the new children. This is the
/// runtime-add counterpart of the shape-changed reporter (<see cref="ReportNodeShapeChangedEvent"/>):
/// when a driver discovers FixedTree nodes AFTER the server is up and they are materialised into the
/// served Equipment address space (Tasks 4/5), an attribute notification alone is invisible to a
/// subscribed client — only a model-change event tells it the address space grew.
/// <para>
/// The event is built under <c>Lock</c> but reported AFTER the lock is released, mirroring
/// <see cref="ReportNodeShapeChangedEvent"/> / <see cref="RevertOptimisticWriteIfNeeded"/>:
/// <c>Server.ReportEvent</c> re-enters the server's own subscription/event path, so holding the node
/// <c>Lock</c> across it risks a lock-order inversion with a client that has event subscriptions.
/// The report is wrapped in try/catch so it is tolerant when eventing is disabled / there are no
/// monitored items / the server is shutting down — the same swallow-and-log tolerance as the
/// write-revert path (<see cref="ReportAuditEvent"/>). The nodes have already been materialised, so
/// a surprise from the event path MUST NOT propagate out of this announcement.
/// </para>
/// </summary>
/// <param name="affectedNodeId">The folder-scoped node id of the parent under which nodes were added.</param>
public void RaiseNodesAddedModelChange(string affectedNodeId)
{
ArgumentException.ThrowIfNullOrEmpty(affectedNodeId);
GeneralModelChangeEventState e;
lock (Lock)
{
e = BuildNodesAddedModelChange(affectedNodeId);
}
// Report OUTSIDE Lock — Server.ReportEvent re-enters the server's own subscription/event path; holding
// Lock across it risks a lock-order inversion (mirrors ReportNodeShapeChangedEvent).
try
{
Server.ReportEvent(SystemContext, e);
}
catch (Exception ex)
{
// Model-change reporting disabled / no monitored items / server shutting down ⇒ ReportEvent may
// no-op or throw; either way the node add already stands. Log to the SDK trace, don't rethrow.
#pragma warning disable CS0618 // Utils.LogError is [Obsolete] in favour of an ITelemetryContext this manager doesn't carry.
Utils.LogError(ex, "OtOpcUaNodeManager: failed to report GeneralModelChangeEvent(NodeAdded) for {0}", affectedNodeId);
#pragma warning restore CS0618
}
}
/// <summary>Build (but do not report) the Part 3 <c>GeneralModelChangeEvent</c> announcing that nodes were
/// added under <paramref name="affectedNodeId"/>. MIRRORS <see cref="BuildNodeShapeChangedEvent"/> exactly —
/// the only differences are <c>Verb = NodeAdded</c> (vs <c>DataTypeChanged</c>) and <c>Affected</c> = the
/// passed parent node id (vs the variable's own NodeId). <c>AffectedType</c> carries the affected node's
/// TypeDefinition resolved from the live node maps (same semantics the shape-changed builder gets from the
/// variable), defaulting to <see cref="NodeId.Null"/> when the id is not (yet) materialised. <c>internal</c>
/// (not private) so a node-manager test can assert the populated Changes structure at the nearest
/// deterministic seam (the end-to-end <c>Server.ReportEvent</c> dispatch would need a subscribed event
/// monitored-item to observe).</summary>
/// <param name="affectedNodeId">The folder-scoped node id of the parent under which nodes were added.</param>
/// <returns>A populated, unreported <see cref="GeneralModelChangeEventState"/>.</returns>
internal GeneralModelChangeEventState BuildNodesAddedModelChange(string affectedNodeId)
{
var affected = new NodeId(affectedNodeId, NamespaceIndex);
var e = new GeneralModelChangeEventState(null);
e.Initialize(
SystemContext,
source: null,
severity: EventSeverity.Medium,
message: new LocalizedText($"Nodes added under {affected}"));
// Part 3 §8.7.4: a GeneralModelChangeEvent is emitted by the Server object — set SourceNode/SourceName
// to Server explicitly (we report with source:null since this manager has no Server NodeState handle),
// so conformant clients that filter events by SourceNode still match this one.
e.SetChildValue(SystemContext, BrowseNames.SourceNode, ObjectIds.Server, false);
e.SetChildValue(SystemContext, BrowseNames.SourceName, "Server", false);
var change = new ModelChangeStructureDataType
{
Affected = affected,
// The affected node is the parent the children were added under; carry its TypeDefinition (a Folder
// for an equipment parent) just as the shape-changed builder carries the variable's. Null when the
// id is unknown — a valid Part 3 "type not applicable", and clients re-browse Affected regardless.
AffectedType = ResolveAffectedTypeDefinition(affectedNodeId),
Verb = (byte)ModelChangeStructureVerbMask.NodeAdded,
};
// SetChildValue lazily creates + sets the Changes property (same pattern the audit-event builder
// relies on for its child PropertyStates).
e.SetChildValue(SystemContext, BrowseNames.Changes, new[] { change }, false);
return e;
}
/// <summary>Resolve the TypeDefinition of a materialised node id from the live folder/variable maps for a
/// model-change event's <c>AffectedType</c>; <see cref="NodeId.Null"/> when the id is not registered.</summary>
/// <param name="nodeId">The folder-scoped node id whose TypeDefinition is wanted.</param>
private NodeId ResolveAffectedTypeDefinition(string nodeId)
{
if (_folders.TryGetValue(nodeId, out var folder)) return folder.TypeDefinitionId;
if (_variables.TryGetValue(nodeId, out var variable)) return variable.TypeDefinitionId;
return NodeId.Null;
}
/// <summary>Map a Tag.DataType string ("Boolean", "Int32", "Float", "Double", "String",
/// "DateTime") to the OPC UA built-in NodeId. Unknown names fall back to BaseDataType
/// (matches CreateVariable's default for lazy-created nodes).</summary>
@@ -88,4 +88,8 @@ public sealed class SdkAddressSpaceSink : IOpcUaAddressSpaceSink, ISurgicalAddre
/// <summary>Rebuilds the entire OPC UA address space.</summary>
public void RebuildAddressSpace() => _nodeManager.RebuildAddressSpace();
/// <summary>Announces a runtime NodeAdded model-change (discovered-node injection) to subscribed clients.</summary>
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
public void RaiseNodesAddedModelChange(string affectedNodeId) => _nodeManager.RaiseNodesAddedModelChange(affectedNodeId);
}
@@ -0,0 +1,67 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>
/// An <see cref="IAddressSpaceBuilder"/> that RECORDS the streamed tree instead of creating OPC UA
/// nodes — used to capture an <see cref="ITagDiscovery"/> driver's discovered hierarchy so the
/// runtime can graft it under an equipment node. Folder nesting is tracked (each child builder
/// carries its accumulated path), so every variable records its full <see cref="DiscoveredNode.FolderPathSegments"/>.
/// <para>Value nodes only: <see cref="AddProperty"/> is ignored and alarm marking returns a no-op sink
/// (discovered alarms are out of scope — alarms come via the config path).</para>
/// <para>Single-threaded: a driver's <c>DiscoverAsync</c> streams on one caller; the root and its child
/// builders share one <see cref="List{T}"/>. Not thread-safe by design.</para>
/// </summary>
public sealed class CapturingAddressSpaceBuilder : IAddressSpaceBuilder
{
private readonly List<DiscoveredNode> _nodes;
private readonly IReadOnlyList<string> _path;
/// <summary>Create a root capturing builder with an empty folder path and a fresh node list.</summary>
public CapturingAddressSpaceBuilder() : this([], []) { }
private CapturingAddressSpaceBuilder(List<DiscoveredNode> nodes, IReadOnlyList<string> path)
{
_nodes = nodes;
_path = path;
}
/// <summary>All variables captured across the whole tree (shared by the root and every child scope).</summary>
public IReadOnlyList<DiscoveredNode> Nodes => _nodes;
/// <inheritdoc />
public IAddressSpaceBuilder Folder(string browseName, string displayName)
=> new CapturingAddressSpaceBuilder(_nodes, [.. _path, browseName]);
/// <inheritdoc />
public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo)
{
_nodes.Add(new DiscoveredNode(
FolderPathSegments: _path,
BrowseName: browseName,
DisplayName: displayName,
FullReference: attributeInfo.FullName,
DataType: attributeInfo.DriverDataType,
IsArray: attributeInfo.IsArray,
ArrayDim: attributeInfo.ArrayDim,
Writable: attributeInfo.SecurityClass != SecurityClassification.ViewOnly,
IsHistorized: attributeInfo.IsHistorized));
return new NullHandle(attributeInfo.FullName);
}
/// <inheritdoc />
public void AddProperty(string browseName, DriverDataType dataType, object? value) { /* metadata only — ignored */ }
/// <summary>A variable handle whose alarm marking is a no-op (discovered alarms are out of scope).</summary>
private sealed class NullHandle(string fullRef) : IVariableHandle
{
public string FullReference => fullRef;
public IAlarmConditionSink MarkAsAlarmCondition(AlarmConditionInfo info) => new NullSink();
}
/// <summary>A null sink that ignores alarm condition transitions.</summary>
private sealed class NullSink : IAlarmConditionSink
{
public void OnTransition(AlarmEventArgs args) { }
}
}
@@ -201,7 +201,10 @@ public static class DeploymentArtifact
var areas = ReadArray(root, "UnsAreas", ReadAreaProjection);
var lines = ReadArray(root, "UnsLines", ReadLineProjection);
var equipment = ReadArray(root, "Equipment", ReadEquipmentNode);
// DeviceId → connection host, resolved from the artifact's Devices array via the SAME shared
// helper the composer uses, so each EquipmentNode.DeviceHost is byte-parity-equal across seams.
var deviceHostById = BuildDeviceHostMap(root);
var equipment = ReadArray(root, "Equipment", el => ReadEquipmentNode(el, deviceHostById));
var drivers = ReadArray(root, "DriverInstances", ReadDriverPlan);
var alarms = ReadArray(root, "ScriptedAlarms", ReadAlarmPlan);
var equipmentTags = BuildEquipmentTagPlans(root);
@@ -807,7 +810,29 @@ public static class DeploymentArtifact
return new UnsLineProjection(id!, areaId!, name ?? id!);
}
private static EquipmentNode? ReadEquipmentNode(JsonElement el)
/// <summary>Build the <c>DeviceId</c> → connection-host map from the artifact's <c>Devices</c> array
/// (each row carries a <c>DeviceId</c> + schemaless <c>DeviceConfig</c> JSON). The host is resolved via
/// the shared <see cref="AddressSpaceComposer.TryExtractDeviceHost"/> so the artifact-decode side
/// normalizes byte-identically to the live-edit composer. Ordinal comparer + last-wins on a duplicate
/// DeviceId. A missing/empty/non-array <c>Devices</c> property yields an empty map (no device hosts).</summary>
/// <param name="root">The artifact root element.</param>
/// <returns>The resolved DeviceId → host map (host may be null when a device has no parseable HostAddress).</returns>
private static IReadOnlyDictionary<string, string?> BuildDeviceHostMap(JsonElement root)
{
var map = new Dictionary<string, string?>(StringComparer.Ordinal);
if (!root.TryGetProperty("Devices", out var arr) || arr.ValueKind != JsonValueKind.Array)
return map;
foreach (var el in arr.EnumerateArray())
{
if (el.ValueKind != JsonValueKind.Object) continue;
var deviceId = ReadString(el, "DeviceId");
if (string.IsNullOrWhiteSpace(deviceId)) continue;
map[deviceId!] = AddressSpaceComposer.TryExtractDeviceHost(ReadString(el, "DeviceConfig"));
}
return map;
}
private static EquipmentNode? ReadEquipmentNode(JsonElement el, IReadOnlyDictionary<string, string?> deviceHostById)
{
var id = ReadString(el, "EquipmentId");
// DisplayName = the UNS level-5 Name segment (friendly browse name, matching UnsArea/UnsLine
@@ -816,7 +841,19 @@ public static class DeploymentArtifact
var displayName = ReadString(el, "Name");
var lineId = ReadString(el, "UnsLineId");
if (string.IsNullOrWhiteSpace(id)) return null;
return new EquipmentNode(id!, displayName ?? id!, lineId ?? string.Empty);
// DriverInstanceId / DeviceId copied straight from the row (null when absent / JSON null);
// DeviceHost resolved from the device-host map by DeviceId — byte-parity with the composer's
// `e.DeviceId is null ? null : deviceHostById.GetValueOrDefault(e.DeviceId)`.
var driverInstanceId = ReadString(el, "DriverInstanceId");
var deviceId = ReadString(el, "DeviceId");
var deviceHost = deviceId is null ? null : deviceHostById.GetValueOrDefault(deviceId);
return new EquipmentNode(
id!,
displayName ?? id!,
lineId ?? string.Empty,
DriverInstanceId: driverInstanceId,
DeviceId: deviceId,
DeviceHost: deviceHost);
}
private static DriverInstancePlan? ReadDriverPlan(JsonElement el)
@@ -0,0 +1,19 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>
/// A flattened variable captured from a driver's <see cref="ITagDiscovery.DiscoverAsync"/> stream
/// by <see cref="CapturingAddressSpaceBuilder"/>. Folder nesting is preserved in
/// <see cref="FolderPathSegments"/> so the injector can re-root the node under an equipment.
/// </summary>
public sealed record DiscoveredNode(
IReadOnlyList<string> FolderPathSegments,
string BrowseName,
string DisplayName,
string FullReference,
DriverDataType DataType,
bool IsArray,
uint? ArrayDim,
bool Writable,
bool IsHistorized);
@@ -0,0 +1,113 @@
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>The mapped result of grafting discovered nodes under an equipment node.</summary>
/// <param name="Folders">
/// Folders to ensure, in insertion order (parent-before-child within each node's prefix chain) — NOT
/// globally depth-sorted. The applier sorts by depth before ensuring, so consumers must not assume a
/// global parent-before-child ordering across the whole list.
/// </param>
/// <param name="Variables">Variables to ensure under the (post-collapse) folders.</param>
/// <param name="RoutingByRef">Driver FullReference -> equipment NodeId, for live-value routing.</param>
public sealed record DiscoveredInjectionPlan(
IReadOnlyList<DiscoveredFolder> Folders,
IReadOnlyList<DiscoveredVariable> Variables,
IReadOnlyDictionary<string, string> RoutingByRef); // driver FullReference -> equipment NodeId
/// <summary>
/// Pure mapper: re-roots a driver's captured discovery tree under an equipment node, deduping
/// authored Config-DB refs and collapsing the single device-host folder. See the design doc
/// 2026-06-26-otopcua-fixedtree-equipment-injection-design.md.
/// </summary>
public static class DiscoveredNodeMapper
{
/// <summary>
/// Maps captured <paramref name="nodes"/> into folders + variables (NodeIds scoped under
/// <paramref name="equipmentId"/>) plus a driver-FullReference → equipment-NodeId routing map.
/// </summary>
/// <param name="equipmentId">The owning equipment's NodeId (root of the grafted subtree).</param>
/// <param name="nodes">The captured discovery tree (from <c>CapturingAddressSpaceBuilder</c>).</param>
/// <param name="authoredRefs">
/// Driver FullReferences already authored as Config-DB equipment tags for this driver —
/// skipped so a discovered node never shadows an authored one.
/// </param>
/// <returns>The folders, variables, and routing map to apply against the OPC UA address space.</returns>
public static DiscoveredInjectionPlan Map(
string equipmentId, IReadOnlyList<DiscoveredNode> nodes, IReadOnlySet<string> authoredRefs)
{
var kept = nodes.Where(n => !authoredRefs.Contains(n.FullReference)).ToList();
// Device-folder collapse: when every kept node shares one identical index-1 segment (the single
// device-host folder under the driver root, e.g. "10.0.0.5:8193"), drop it so the path reads
// FOCAS/Identity/... rather than FOCAS/10.0.0.5:8193/Identity/.... With >=2 distinct devices the
// level is retained so identical leaf names across devices don't collide (degrades gracefully).
var collapseIndex1 = kept.Count > 0
&& kept.All(n => n.FolderPathSegments.Count >= 2)
&& kept.Select(n => n.FolderPathSegments[1]).Distinct(StringComparer.Ordinal).Count() == 1;
static IReadOnlyList<string> Effective(IReadOnlyList<string> segs, bool collapse)
=> collapse ? [segs[0], .. segs.Skip(2)] : segs;
var folders = new Dictionary<string, DiscoveredFolder>(StringComparer.Ordinal);
var variables = new List<DiscoveredVariable>();
var routing = new Dictionary<string, string>(StringComparer.Ordinal);
foreach (var n in kept)
{
var segs = Effective(n.FolderPathSegments, collapseIndex1);
// Ensure every prefix folder, deduped, each parented at its prefix (the first segment's
// parent is the equipment itself).
for (var i = 0; i < segs.Count; i++)
{
var folderPath = string.Join('/', segs.Take(i + 1));
var nodeId = EquipmentNodeIds.SubFolder(equipmentId, folderPath);
if (folders.ContainsKey(nodeId)) continue;
var parent = i == 0 ? equipmentId : EquipmentNodeIds.SubFolder(equipmentId, string.Join('/', segs.Take(i)));
folders[nodeId] = new DiscoveredFolder(nodeId, parent, segs[i]);
}
var varFolderPath = string.Join('/', segs);
var varNodeId = EquipmentNodeIds.Variable(equipmentId, varFolderPath, n.BrowseName);
// Mirror AddressSpaceApplier.MaterialiseEquipmentTags: a folder-less variable parents directly
// at the equipment (SubFolder("", ...) would yield a trailing-slash "EQ-1/" that mismatches the
// EquipmentNodeIds.Variable NodeId, which guards IsNullOrWhiteSpace).
var varParent = string.IsNullOrEmpty(varFolderPath)
? equipmentId
: EquipmentNodeIds.SubFolder(equipmentId, varFolderPath);
variables.Add(new DiscoveredVariable(
varNodeId, varParent, n.DisplayName, ToBuiltinTypeString(n.DataType), n.Writable, n.IsArray, n.ArrayDim));
routing[n.FullReference] = varNodeId;
}
return new DiscoveredInjectionPlan(folders.Values.ToList(), variables, routing);
}
/// <summary>
/// Maps a <see cref="DriverDataType"/> to the OPC-UA-built-in type STRING that
/// <c>OtOpcUaNodeManager.EnsureVariable</c>'s <c>ResolveBuiltInDataType</c> accepts — so a
/// discovered variable resolves to the same built-in type as an authored equipment tag. Most
/// enum names pass through verbatim; <see cref="DriverDataType.Float32"/>/<see cref="DriverDataType.Float64"/>
/// map to the SDK's "Float"/"Double" names, and <see cref="DriverDataType.Reference"/> (a Galaxy
/// attribute reference) is carried as an OPC UA String per the enum's own contract.
/// </summary>
private static string ToBuiltinTypeString(DriverDataType dt) => dt switch
{
DriverDataType.Boolean => "Boolean",
DriverDataType.Int16 => "Int16",
DriverDataType.Int32 => "Int32",
DriverDataType.Int64 => "Int64",
DriverDataType.UInt16 => "UInt16",
DriverDataType.UInt32 => "UInt32",
DriverDataType.UInt64 => "UInt64",
DriverDataType.Float32 => "Float",
DriverDataType.Float64 => "Double",
DriverDataType.String => "String",
DriverDataType.DateTime => "DateTime",
DriverDataType.Reference => "String",
_ => throw new ArgumentOutOfRangeException(nameof(dt), dt, "Unmapped DriverDataType."),
};
}
@@ -153,6 +153,37 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
/// value maps so stale condition state never leaks across redeploys.</summary>
private readonly NativeAlarmProjector _nativeAlarmProjector = new();
/// <summary>The composition from the most-recent apply (set at the END of
/// <see cref="PushDesiredSubscriptions"/>). Discovered-node injection
/// (<see cref="HandleDiscoveredNodes"/>) reads it to resolve the equipment bound to a driver (from the
/// composition's <c>EquipmentNodes</c> whose <c>DriverInstanceId</c> matches, UNION the authored
/// <c>EquipmentTags</c> for that driver — so a driver with zero authored tags can still graft onto an
/// equipment bound via <c>EquipmentNode.DriverInstanceId</c>) and to recompute the authored value + alarm
/// subscription sets when merging FixedTree refs. Null until the first apply — a
/// <see cref="DriverInstanceActor.DiscoveredNodesReady"/> arriving before any apply is ignored.</summary>
private AddressSpaceComposition? _lastComposition;
/// <summary>The most-recent discovered-injection plan(s) per driver instance, cached so the redeploy
/// re-inject tail can re-apply the live graft after an address-space rebuild without re-running discovery.
/// Keyed by DriverInstanceId at the OUTER level, then by EquipmentId at the INNER level (driver → (equipment
/// → plan)). Today only the single-equipment case is populated, so the inner map always has exactly one
/// entry; the inner map is shaped per-equipment now so the follow-up multi-device-partition task can hold
/// multiple (equipmentId → plan) entries per driver without reshaping this cache. Inner dict is mutable
/// (the redeploy tail drops stale per-equipment entries in place); both levels are Ordinal-keyed.
/// Last-writer-wins on a re-discovery (the whole inner map is replaced).</summary>
private readonly Dictionary<string, Dictionary<string, DiscoveredInjectionPlan>> _discoveredByDriver =
new(StringComparer.Ordinal);
/// <summary>Per-driver signature of the last-logged device-host PARTITION diagnostic (unmatched / ambiguous
/// / degenerate host), folded with the current revision, so the ~15 repeated re-discovery passes within a
/// connect don't re-warn an unchanged condition: it is WARNED once when it first appears (or changes), and
/// DEBUG-logged on the identical repeat passes. Folding in <see cref="_currentRevision"/> makes a redeploy
/// re-warn once. Best-effort LOG-LEVEL dedup ONLY — never affects grafting; the matched-plan re-apply is
/// separately short-circuited by <see cref="PlansRoutingEqual"/>. Cleared for a driver whose partition comes
/// back clean so a later recurrence re-warns; bounded by driver count (a few). Only touched on the
/// multi-candidate path (<see cref="PartitionDiscoveredByDeviceHost"/>).</summary>
private readonly Dictionary<string, string> _lastPartitionWarnSignature = new(StringComparer.Ordinal);
/// <summary>
/// Cached local <see cref="RedundancyRole"/> from the latest <see cref="RedundancyStateChanged"/>
/// snapshot (null = unknown until the first snapshot arrives, or no local node match). The inbound
@@ -483,6 +514,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<DriverInstanceActor.AttributeValuePublished>(ForwardToMux);
Receive<DriverInstanceActor.AttributeAlarmPublished>(ForwardNativeAlarm);
Receive<DriverInstanceActor.DiscoveredNodesReady>(HandleDiscoveredNodes);
Receive<RestartDriver>(HandleRestartDriver);
Receive<ReconnectDriver>(HandleReconnectDriver);
Receive<RouteNodeWrite>(HandleRouteNodeWrite);
@@ -511,6 +543,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
Receive<GetDiagnostics>(HandleGetDiagnostics);
Receive<DriverInstanceActor.AttributeValuePublished>(ForwardToMux);
Receive<DriverInstanceActor.AttributeAlarmPublished>(ForwardNativeAlarm);
Receive<DriverInstanceActor.DiscoveredNodesReady>(HandleDiscoveredNodes);
Receive<RestartDriver>(HandleRestartDriver);
Receive<ReconnectDriver>(HandleReconnectDriver);
Receive<RouteNodeWrite>(HandleRouteNodeWrite);
@@ -552,6 +585,331 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
}
}
/// <summary>
/// Handles a driver child's post-connect <see cref="DriverInstanceActor.DiscoveredNodesReady"/>:
/// resolves the equipment the driver is bound to from the most-recent applied composition (its
/// <c>EquipmentNodes</c> bound by <c>DriverInstanceId</c> UNION its authored <c>EquipmentTags</c>),
/// maps the captured FixedTree under it via <see cref="DiscoveredNodeMapper"/> (deduping any node that
/// shadows an authored equipment-tag ref), caches the per-equipment plan map, and grafts it onto the
/// served address space + live-value maps + subscription set via
/// <see cref="ApplyDiscoveredPlansForDriver"/>. Idempotent / duplicate-safe: the mapper is pure,
/// materialisation is idempotent, and the routing-map extension + subscription merge are set-based.
/// </summary>
private void HandleDiscoveredNodes(DriverInstanceActor.DiscoveredNodesReady msg)
{
if (_lastComposition is null)
{
_log.Debug("DriverHost {Node}: DiscoveredNodesReady from {Driver} before any composition applied — ignored",
_localNode, msg.DriverInstanceId);
return;
}
// Resolve the equipment bound to this driver from BOTH the composition's EquipmentNodes (whose
// DriverInstanceId matches — this lets a driver with ZERO authored tags graft onto a tag-less
// equipment) UNION the authored EquipmentTags for the driver (the original resolution). Distinct so a
// driver that is both EquipmentNode-bound AND has authored tags under the same equipment resolves once.
var fromNodes = _lastComposition.EquipmentNodes
.Where(e => e.DriverInstanceId is not null && string.Equals(e.DriverInstanceId, msg.DriverInstanceId, StringComparison.Ordinal))
.Select(e => e.EquipmentId);
var fromTags = _lastComposition.EquipmentTags
.Where(t => string.Equals(t.DriverInstanceId, msg.DriverInstanceId, StringComparison.Ordinal))
.Select(t => t.EquipmentId);
var equipmentIds = fromNodes.Concat(fromTags).Distinct(StringComparer.Ordinal).ToList();
if (equipmentIds.Count == 0)
{
_log.Info("DriverHost {Node}: no equipment for driver {Driver} — skipping discovered-node injection",
_localNode, msg.DriverInstanceId);
return;
}
// Authored refs for THIS driver (DRIVER-WIDE — both value + alarm tags) so a discovered node never
// shadows an authored one — the mapper drops any captured node whose FullReference is already authored.
// May be EMPTY for a tag-less equipment, which is fine: Map dedups against an empty set (keeps
// everything). Safe even for the multi-device partition below: a FOCAS FullReference is host-prefixed,
// so a device-X discovered node can't collide with a device-Y authored ref — the driver-wide set is
// correct per partition.
var authoredRefs = _lastComposition.EquipmentTags
.Where(t => string.Equals(t.DriverInstanceId, msg.DriverInstanceId, StringComparison.Ordinal))
.Select(t => t.FullName)
.ToHashSet(StringComparer.Ordinal);
// Build this discovery's per-equipment plan map.
// • EXACTLY ONE candidate ⇒ map the WHOLE captured tree under it (the mapper collapses the single
// device-host folder ⇒ clean EQ-n/FOCAS/...). Unchanged from before.
// • MORE THAN ONE candidate ⇒ PARTITION the captured tree by its (normalized) device-host folder
// segment and graft each device's subset under the equipment whose DeviceHost matches (follow-up E
// part 2). Unmatched/ambiguous hosts are warn-skipped (safe), not mis-grafted; a degenerate case
// (>1 candidate, none has a DeviceHost) warn-skips the whole driver. See PartitionDiscoveredByDeviceHost.
Dictionary<string, DiscoveredInjectionPlan> newPlans;
if (equipmentIds.Count == 1)
{
var plan = DiscoveredNodeMapper.Map(equipmentIds[0], msg.Nodes, authoredRefs);
if (plan.Variables.Count == 0) return; // nothing new to inject (all captured nodes were authored)
newPlans = new Dictionary<string, DiscoveredInjectionPlan>(StringComparer.Ordinal) { [equipmentIds[0]] = plan };
}
else
{
newPlans = PartitionDiscoveredByDeviceHost(msg, equipmentIds, authoredRefs);
if (newPlans.Count == 0) return; // degenerate / no host matched a graftable partition — already logged
}
// Unchanged-plan short-circuit (shared by the single- AND multi-device paths): the driver re-discovers
// every ~2s (up to ~15 passes) until the FixedTree set stabilises, re-sending DiscoveredNodesReady each
// pass. Re-applying an IDENTICAL set would re-send SetDesiredSubscriptions, forcing the child to
// UnsubscribeAsync (dropping the WHOLE handle — authored tags included) then re-Subscribe — blipping
// authored-tag values up to ~15× across the discovery window. Skip when the WHOLE per-equipment routing
// is unchanged from the last applied pass; a GROWING set still differs (superset) and re-applies. (This
// is also why an unmatched/ambiguous partition warning settles: once the matched partitions stabilise we
// short-circuit here, and the partition warns are themselves signature-deduped — see ShouldWarnPartition.)
if (_discoveredByDriver.TryGetValue(msg.DriverInstanceId, out var cached)
&& PlansRoutingEqual(cached, newPlans))
{
var total = newPlans.Values.Sum(p => p.Variables.Count);
_log.Debug("DriverHost {Node}: discovered set for driver {Driver} unchanged ({Count} node(s) across {Equipment} equipment(s)) — re-apply skipped",
_localNode, msg.DriverInstanceId, total, newPlans.Count);
return;
}
_discoveredByDriver[msg.DriverInstanceId] = newPlans;
ApplyDiscoveredPlansForDriver(msg.DriverInstanceId, newPlans);
}
/// <summary>
/// Partitions a multi-device driver's captured FixedTree by its (normalized) device-host folder segment
/// (<c>FolderPathSegments[1]</c>) and maps each device's subset under the candidate equipment whose
/// <see cref="EquipmentNode.DeviceHost"/> matches — the follow-up E part-2 multi-device graft. Returns
/// the per-equipment plan map (one entry per device that matched AND had at least one new variable);
/// EMPTY when nothing is graftable.
/// <list type="bullet">
/// <item>Builds <c>normalizedHost → equipmentId</c> from the candidate <see cref="EquipmentNode"/>s
/// that carry a non-null DeviceHost. Two distinct candidates sharing a host is AMBIGUOUS — that host
/// is un-mapped (its nodes are warn-skipped) rather than grafted onto an arbitrary equipment.</item>
/// <item><b>I1 divergence:</b> a candidate WITHOUT a DeviceHost (e.g. resolved via authored tags only,
/// no device binding) simply gets no partition — the FixedTree is the device's structure, so it
/// belongs under the device-bound equipment. No crash; that candidate is just not a partition target.</item>
/// <item>If NO candidate has a DeviceHost at all there is nothing to partition on ⇒ DEGENERATE ⇒
/// warn-skip the whole driver (returns empty).</item>
/// <item>A discovered partition whose host is unmatched (or whose node has &lt;2 folder segments, so no
/// host folder) is warn-skipped — its nodes are NOT mis-grafted; the matched partitions still graft.</item>
/// </list>
/// The device-host folder segment AND the stored DeviceHost are both run through the SAME
/// <see cref="AddressSpaceComposer.NormalizeDeviceHost"/> (single source of truth), so they compare equal
/// regardless of case/whitespace.
/// <para><b>Warn-spam taming.</b> The unmatched/ambiguous/degenerate condition is warned ONCE then
/// Debug-logged on the repeated re-discovery passes (see <see cref="ShouldWarnPartition"/>).</para>
/// <para><b>Mid-connect partition shrink (M2).</b> If a later pass yields FEWER device partitions than a
/// prior pass within the same connect, the dropped partition's routes + materialised nodes are NOT
/// actively pruned until the next full redeploy (<see cref="PushDesiredSubscriptions"/> Clears + rebuilds
/// the maps). This matches the existing "FixedTree grows-then-stabilises within a connect" assumption —
/// no mid-connect pruning is built here (out of scope).</para>
/// </summary>
private Dictionary<string, DiscoveredInjectionPlan> PartitionDiscoveredByDeviceHost(
DriverInstanceActor.DiscoveredNodesReady msg,
IReadOnlyList<string> equipmentIds,
IReadOnlySet<string> authoredRefs)
{
var driverId = msg.DriverInstanceId;
var candidateSet = equipmentIds.ToHashSet(StringComparer.Ordinal);
// normalizedHost → equipmentId, from candidate EquipmentNodes that carry a DeviceHost. A host shared by
// two DISTINCT candidates is ambiguous: un-map it (warn-skip) so its nodes aren't grafted arbitrarily.
var hostToEquipment = new Dictionary<string, string>(StringComparer.Ordinal);
var ambiguousHosts = new HashSet<string>(StringComparer.Ordinal);
foreach (var node in _lastComposition!.EquipmentNodes)
{
if (!candidateSet.Contains(node.EquipmentId) || node.DeviceHost is null) continue;
// DeviceHost is already normalized at compose/decode time; re-normalize through the shared helper so
// the comparison is the single source of truth (idempotent — harmless if it was already normalized).
var host = AddressSpaceComposer.NormalizeDeviceHost(node.DeviceHost);
if (ambiguousHosts.Contains(host)) continue;
if (hostToEquipment.TryGetValue(host, out var existing))
{
if (!string.Equals(existing, node.EquipmentId, StringComparison.Ordinal))
{
hostToEquipment.Remove(host);
ambiguousHosts.Add(host);
}
continue;
}
hostToEquipment[host] = node.EquipmentId;
}
// DEGENERATE: >1 candidate but none resolved a DeviceHost ⇒ nothing to partition on ⇒ warn-skip the
// whole driver. (Falls through the same warn-once dedup as the unmatched case.)
if (hostToEquipment.Count == 0 && ambiguousHosts.Count == 0)
{
if (ShouldWarnPartition(driverId, "degenerate"))
_log.Warning("DriverHost {Node}: driver {Driver} maps to {Count} equipments but none has a DeviceHost — discovered-node injection skipped (no device-host to partition on)",
_localNode, driverId, equipmentIds.Count);
else
_log.Debug("DriverHost {Node}: driver {Driver} still has no DeviceHost on any of {Count} equipments — skipped (repeat)",
_localNode, driverId, equipmentIds.Count);
return new Dictionary<string, DiscoveredInjectionPlan>(StringComparer.Ordinal);
}
// Partition the captured tree by its device-host folder segment (FolderPathSegments[1]); a node with
// <2 segments has no host folder (null ⇒ unmatched). Keep only nodes whose host matches a candidate.
var matchedNodes = new Dictionary<string, List<DiscoveredNode>>(StringComparer.Ordinal);
var unmatchedHosts = new HashSet<string>(StringComparer.Ordinal);
foreach (var n in msg.Nodes)
{
var key = n.FolderPathSegments.Count >= 2
? AddressSpaceComposer.NormalizeDeviceHost(n.FolderPathSegments[1])
: null;
if (key is not null && hostToEquipment.ContainsKey(key))
{
if (!matchedNodes.TryGetValue(key, out var list))
matchedNodes[key] = list = new List<DiscoveredNode>();
list.Add(n);
}
else
{
unmatchedHosts.Add(key ?? "(no-device-host-folder)");
}
}
// Map each matched device's subset under its equipment. ONE device per partition ⇒ the mapper collapses
// that partition's single host folder ⇒ clean EQ-n/FOCAS/...; a plan with zero new variables (all
// shadowed by authored refs) contributes no entry.
// NOTE: DiscoveredNodeMapper.Map's collapse predicate compares the host segment with RAW
// StringComparer.Ordinal, whereas we grouped on the NORMALIZED host. Harmless: a real FOCAS device
// emits one consistent HostAddress string per device, so a partition is single-host either way (collapse
// fires). Even if two raw spellings of the same host slipped into one partition, the only effect would be
// a retained (non-collapsed) host folder — never a mis-graft or NodeId collision (the equipment scope
// already isolates them).
var plans = new Dictionary<string, DiscoveredInjectionPlan>(StringComparer.Ordinal);
foreach (var (host, nodes) in matchedNodes)
{
var equipmentId = hostToEquipment[host];
var plan = DiscoveredNodeMapper.Map(equipmentId, nodes, authoredRefs);
if (plan.Variables.Count > 0) plans[equipmentId] = plan;
}
// Surface unmatched/ambiguous hosts ONCE (then Debug on the repeated passes). The matched partitions
// above still graft regardless. When the partition came back fully clean, drop the driver's signature so
// a later recurrence re-warns.
if (unmatchedHosts.Count > 0 || ambiguousHosts.Count > 0)
{
var unmatched = string.Join(",", unmatchedHosts.OrderBy(h => h, StringComparer.Ordinal));
var ambiguous = string.Join(",", ambiguousHosts.OrderBy(h => h, StringComparer.Ordinal));
if (ShouldWarnPartition(driverId, "u:" + unmatched + "|a:" + ambiguous))
_log.Warning("DriverHost {Node}: driver {Driver}: discovered device-host partition(s) skipped — unmatched=[{Unmatched}] ambiguous=[{Ambiguous}]; matched partitions still grafted",
_localNode, driverId, unmatched, ambiguous);
else
_log.Debug("DriverHost {Node}: driver {Driver}: device-host partition(s) still skipped — unmatched=[{Unmatched}] ambiguous=[{Ambiguous}] (repeat)",
_localNode, driverId, unmatched, ambiguous);
}
else
{
_lastPartitionWarnSignature.Remove(driverId);
}
return plans;
}
/// <summary>Best-effort LOG-LEVEL dedup for the device-host partition diagnostics: returns true (⇒ WARN)
/// when <paramref name="conditionKey"/> is newly-seen for the driver this revision, false (⇒ DEBUG) on the
/// identical repeat passes that the ~15×/connect re-discovery produces. Folds the current revision in so a
/// redeploy re-warns once. Records the signature as a side effect. Never affects grafting behavior — only
/// the log level — so a stale entry (e.g. after a transient single↔multi candidate flip) at worst demotes
/// one duplicate warn to Debug.</summary>
private bool ShouldWarnPartition(string driverId, string conditionKey)
{
var signature = (_currentRevision?.ToString() ?? "none") + "|" + conditionKey;
var isNew = !_lastPartitionWarnSignature.TryGetValue(driverId, out var prev)
|| !string.Equals(prev, signature, StringComparison.Ordinal);
_lastPartitionWarnSignature[driverId] = signature;
return isNew;
}
/// <summary>Routing-map equality: same count + every key maps to the same NodeId. Lets
/// <see cref="HandleDiscoveredNodes"/> skip re-applying an unchanged discovered set across the driver's
/// repeated post-connect re-discovery passes (a grown/changed set differs and re-applies).</summary>
private static bool RoutingEquals(IReadOnlyDictionary<string, string> a, IReadOnlyDictionary<string, string> b)
=> a.Count == b.Count
&& a.All(kv => b.TryGetValue(kv.Key, out var v) && string.Equals(v, kv.Value, StringComparison.Ordinal));
/// <summary>Per-equipment plan-map routing equality: same equipment keys + each equipment's plan has the
/// same <see cref="DiscoveredInjectionPlan.RoutingByRef"/> (via <see cref="RoutingEquals"/>). Lets
/// <see cref="HandleDiscoveredNodes"/> short-circuit a re-discovery whose WHOLE per-driver set is unchanged
/// (a grown/changed set on any equipment differs and re-applies).</summary>
private static bool PlansRoutingEqual(
IReadOnlyDictionary<string, DiscoveredInjectionPlan> a,
IReadOnlyDictionary<string, DiscoveredInjectionPlan> b)
=> a.Count == b.Count
&& a.All(kv => b.TryGetValue(kv.Key, out var p) && RoutingEquals(kv.Value.RoutingByRef, p.RoutingByRef));
/// <summary>
/// Grafts a driver's per-equipment <see cref="DiscoveredInjectionPlan"/> map onto the served state in
/// two phases so the resubscribe stays a single push per driver (the shape the multi-device-partition
/// follow-up needs without resubscribe churn):
/// <list type="number">
/// <item><b>Materialise per equipment</b> — for each <c>(equipmentId, plan)</c> entry, extend the
/// live-value routing map (mirroring <see cref="PushDesiredSubscriptions"/>' fan-out so
/// <see cref="ForwardToMux"/> lands FixedTree values on the right node) and Tell the publish actor
/// <see cref="ZB.MOM.WW.OtOpcUa.Runtime.OpcUa.OpcUaPublishActor.MaterialiseDiscoveredNodes"/> for
/// that equipment (idempotent).</item>
/// <item><b>Subscribe ONCE per driver</b> — compute the union of the driver's authored value refs
/// (recomputed the same way <see cref="PushDesiredSubscriptions"/> does) and the FixedTree refs of
/// ALL the driver's cached plans, then Tell the child a single
/// <see cref="DriverInstanceActor.SetDesiredSubscriptions"/> so the poll engine reads them and the
/// values flow. For a single-equipment driver this equals the prior per-plan behavior.</item>
/// </list>
/// Extracted as a standalone method so the redeploy re-inject tail can re-apply the cached plans after
/// an address-space rebuild without re-running discovery.
/// </summary>
private void ApplyDiscoveredPlansForDriver(
string driverId, IReadOnlyDictionary<string, DiscoveredInjectionPlan> plansByEquipment)
{
// (a) Per-equipment: extend the live-value routing map (fan-out, mirroring PushDesiredSubscriptions'
// pattern) + materialise the discovered folders + variables under that equipment (idempotent). This is
// purely ADDITIVE across passes: a shrinking discovery set would leave the dropped refs' stale routes
// until the next full apply (PushDesiredSubscriptions) clears + rebuilds the maps — acceptable because
// a FOCAS FixedTree only grows-then-stabilises, never shrinks within a connect.
var totalVariables = 0;
foreach (var (equipmentId, plan) in plansByEquipment)
{
foreach (var (driverRef, nodeId) in plan.RoutingByRef)
{
var key = (driverId, driverRef);
if (!_nodeIdByDriverRef.TryGetValue(key, out var set))
_nodeIdByDriverRef[key] = set = new HashSet<string>(StringComparer.Ordinal);
set.Add(nodeId);
_driverRefByNodeId[nodeId] = key;
}
_opcUaPublishActor?.Tell(new ZB.MOM.WW.OtOpcUa.Runtime.OpcUa.OpcUaPublishActor.MaterialiseDiscoveredNodes(
equipmentId, plan.Folders, plan.Variables));
totalVariables += plan.Variables.Count;
}
// (b) ONE subscription push per driver: merge the FixedTree refs from ALL the driver's plans into the
// driver's desired subscription set so the poll engine reads them and ForwardToMux routes the values.
// Recompute the authored value + alarm refs the same way PushDesiredSubscriptions does, then union the
// FixedTree refs onto the value set. Doing the union here (rather than once per plan) means the
// multi-device task adds inner-map entries without changing this single-send shape.
if (!_children.TryGetValue(driverId, out var entry)) return;
// The _lastComposition null-guards below are defensive: HandleDiscoveredNodes already proved it
// non-null, but the redeploy tail also calls this from the PushDesiredSubscriptions tail — keep them
// so that re-apply path can't NRE.
var authoredValueRefs = _lastComposition is null
? Enumerable.Empty<string>()
: _lastComposition.EquipmentTags
.Where(t => t.Alarm is null && string.Equals(t.DriverInstanceId, driverId, StringComparison.Ordinal))
.Select(t => t.FullName);
var alarmRefs = _lastComposition is null
? Array.Empty<string>()
: _lastComposition.EquipmentTags
.Where(t => t.Alarm is not null && string.Equals(t.DriverInstanceId, driverId, StringComparison.Ordinal))
.Select(t => t.FullName)
.Distinct(StringComparer.Ordinal)
.ToArray();
var discoveredRefs = plansByEquipment.Values.SelectMany(p => p.RoutingByRef.Keys);
var union = authoredValueRefs.Concat(discoveredRefs).Distinct(StringComparer.Ordinal).ToArray();
entry.Actor.Tell(new DriverInstanceActor.SetDesiredSubscriptions(union, SubscriptionPublishingInterval, alarmRefs));
_log.Info("DriverHost {Node}: injected {Count} discovered node(s) for driver {Driver} across {Equipment} equipment(s)",
_localNode, totalVariables, driverId, plansByEquipment.Count);
}
/// <summary>
/// Routes a native alarm transition (published by a driver child as
/// <see cref="DriverInstanceActor.AttributeAlarmPublished"/>) to its materialised Part 9 condition
@@ -778,6 +1136,10 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
Receive<RouteNativeAlarmAck>(msg =>
_log.Debug("DriverHost {Node}: dropping native-alarm ack for {Node2} while Stale (config DB unreachable)",
_localNode, msg.ConditionNodeId));
// A driver child's post-connect DiscoveredNodesReady can't be injected while Stale (no composition is
// applied yet, so the equipment can't be resolved). Drop it — Task 6's re-discovery loop re-sends it
// and the Task-8 post-recovery re-apply self-heal it once an apply runs (matches the no-op drops above).
Receive<DriverInstanceActor.DiscoveredNodesReady>(_ => { });
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
Timers.StartPeriodicTimer("retry-db", RetryConfigDbConnection.Instance, ReconnectInterval);
}
@@ -1044,13 +1406,35 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
_driverRefByNodeId[nodeId] = key;
}
var total = 0;
foreach (var (driverId, entry) in _children)
// Snapshot the cached (FixedTree-discovered) driver set BEFORE the bulk loop, while _discoveredByDriver
// is still untouched (the re-inject tail below drops/removes entries). Cached drivers are SKIPPED in the
// bulk loop because the tail sends each of them EXACTLY ONE SetDesiredSubscriptions for this pass: the
// authoreddiscovered union (ApplyDiscoveredPlansForDriver) for a survivor, or — if its plan is fully
// dropped — an authored-only fallback. Sending the bulk authored-only set HERE too would force the child
// to drop the whole handle (authored tags included) then re-subscribe — an extra unsub/resub blip of the
// authored values once per cached driver per redeploy. Net effect: exactly ONE send per driver per pass.
var cachedDriverIds = _discoveredByDriver.Keys.ToHashSet(StringComparer.Ordinal);
// One authored-only push (value refs + alarm refs from the maps built above), shared by the bulk loop AND
// the dropped-driver fallback so the two CANNOT drift: the fallback's correctness depends on sending the
// SAME payload the bulk loop would have, so it's a shared helper (structural), not a comment-maintained
// invariant. An EMPTY set is valid — the child's Connected handler routes it to Unsubscribe (dropping a
// stale handle) rather than a spurious subscribe. Returns the value-ref count for the bulk-loop log total.
int SendAuthoredOnly(IActorRef actor, string driverId)
{
var refs = refsByDriver.TryGetValue(driverId, out var r) ? r : Array.Empty<string>();
var alarmRefs = alarmRefsByDriver.TryGetValue(driverId, out var ar) ? ar : Array.Empty<string>();
entry.Actor.Tell(new DriverInstanceActor.SetDesiredSubscriptions(refs, SubscriptionPublishingInterval, alarmRefs));
total += refs.Count;
actor.Tell(new DriverInstanceActor.SetDesiredSubscriptions(refs, SubscriptionPublishingInterval, alarmRefs));
return refs.Count;
}
var total = 0;
foreach (var (driverId, entry) in _children)
{
// Cached drivers are owned exclusively by the re-inject tail (one send each) — skip here. Non-cached
// drivers keep the bulk authored-only send exactly as before.
if (cachedDriverIds.Contains(driverId)) continue;
total += SendAuthoredOnly(entry.Actor, driverId);
}
if (total > 0)
@@ -1085,6 +1469,95 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
_log.Info("DriverHost {Node}: applied {Count} Equipment ScriptedAlarm(s) to the ScriptedAlarm host",
_localNode, composition.EquipmentScriptedAlarms.Count);
}
// Cache the applied composition LAST so discovered-node injection (HandleDiscoveredNodes) can resolve
// the equipment bound to a driver + recompute the authored subscription sets when a driver later
// reports its FixedTree. Set here (not in ApplyAndAck) so both the fresh-apply and bootstrap-restore
// paths — which both route through this method — leave a current composition.
_lastComposition = composition;
// Re-inject discovered (FixedTree) nodes after the authored rebuild. PushDesiredSubscriptions cleared
// _nodeIdByDriverRef and re-pushed authored-only subscriptions above; without this, an IN-PROCESS
// redeploy / re-apply (one that runs while the host is alive, so _discoveredByDriver is populated)
// would drop the injected FixedTree routes + materialised nodes until the driver happens to reconnect
// and re-discover. This loop is INERT on the bootstrap-restore path (RestoreApplied): there the actor
// is freshly constructed so _discoveredByDriver is empty — restart survival comes from Task 6's
// post-connect re-discovery, NOT this re-apply. Re-resolve each cached driver's candidate equipments
// from the CURRENT composition (the SAME EquipmentNodes-UNION-EquipmentTags logic HandleDiscoveredNodes
// uses), then validate each cached (equipmentId → plan) entry PER ENTRY: drop the entry if its
// equipmentId is no longer a resolved candidate for the driver, OR the plan's NodeIds aren't scoped to
// that equipmentId (a rebind). A driver whose inner map empties out is removed entirely. The surviving
// entries are re-applied via the single-send-per-driver structure. (The single-equipment case today has
// exactly one inner entry; the multi-device task adds more.)
foreach (var driverId in _discoveredByDriver.Keys.ToList()) // snapshot — we mutate the dict below
{
var fromNodes = composition.EquipmentNodes
.Where(e => e.DriverInstanceId is not null && string.Equals(e.DriverInstanceId, driverId, StringComparison.Ordinal))
.Select(e => e.EquipmentId);
var fromTags = composition.EquipmentTags
.Where(t => string.Equals(t.DriverInstanceId, driverId, StringComparison.Ordinal))
.Select(t => t.EquipmentId);
var candidates = fromNodes.Concat(fromTags).ToHashSet(StringComparer.Ordinal);
var plansByEquipment = _discoveredByDriver[driverId];
// Track whether ANY entry was dropped (no-longer-candidate or rebind) so we can re-trigger this
// driver's discovery exactly ONCE after the inner map is processed (see the post-loop block).
var droppedAny = false;
foreach (var equipmentId in plansByEquipment.Keys.ToList()) // snapshot — we mutate the inner dict
{
var plan = plansByEquipment[equipmentId];
if (!candidates.Contains(equipmentId))
{
plansByEquipment.Remove(equipmentId);
droppedAny = true;
_log.Debug("DriverHost {Node}: dropped cached discovered nodes for {Driver}/{Equipment} — equipment no longer resolves", _localNode, driverId, equipmentId);
continue;
}
// If the equipment was rebound (the cached plan's NodeIds are scoped to the OLD equipment), drop +
// let re-discovery rebuild against the new equipment. The plan's NodeIds are "{equipmentId}/...".
var planEquipmentConsistent = plan.Variables.Count > 0
&& plan.Variables[0].NodeId.StartsWith(equipmentId + "/", StringComparison.Ordinal);
if (!planEquipmentConsistent)
{
plansByEquipment.Remove(equipmentId);
droppedAny = true;
_log.Debug("DriverHost {Node}: dropped cached discovered nodes for {Driver}/{Equipment} — equipment rebound", _localNode, driverId, equipmentId);
}
}
// Re-trigger discovery when ANY entry was dropped (no-longer-candidate or rebind). A CONFIG-UNCHANGED
// rebind (the driver's DriverConfig is identical, only its authored tag's EquipmentId moved) is NOT
// restarted by ReconcileDrivers — the child stays Connected — so without this nudge the FixedTree
// subtree would stay ABSENT under the new equipment until the driver's next natural reconnect. We now
// ask the child to re-run discovery so it re-grafts promptly: the next pass resolves against the new
// _lastComposition (the now-bound equipment). This is a DISCOVERY action, not lifecycle control — no
// stop/restart; it is idempotent, and the child no-ops it if not Connected (handled in
// DriverInstanceActor). Sent at most ONCE per driver per re-inject pass (here, after the inner map is
// processed — so even when the inner map empties below), guarded on the child still existing.
if (droppedAny && _children.TryGetValue(driverId, out var rediscoverEntry))
rediscoverEntry.Actor.Tell(new DriverInstanceActor.TriggerRediscovery());
if (plansByEquipment.Count == 0)
{
_discoveredByDriver.Remove(driverId);
// Drop the driver's partition warn-signature too so a permanently-removed/rebound driver doesn't
// leak a stale entry (log-level-only state; bounded by driver count — just tidiness).
_lastPartitionWarnSignature.Remove(driverId);
// FALLBACK (one-send invariant): this driver was SKIPPED in the bulk loop (it was cached), and its
// plan is now FULLY DROPPED — so ApplyDiscoveredPlansForDriver won't run for it and it would
// otherwise receive ZERO sends this pass, losing its AUTHORED subscriptions. Send the authored-only
// set NOW (the SAME payload the bulk loop computes), so the authored tags subscribe in THIS pass.
// (The TriggerRediscovery above handles the async FixedTree re-graft separately; this just keeps
// the authored values live meanwhile.) Guarded on the child still existing — a driver removed by
// ReconcileDrivers has no child and correctly gets no send. Shares SendAuthoredOnly with the bulk
// loop so the payload can't drift; a ZERO-authored driver sends an empty set → Unsubscribe (drops
// the stale FixedTree handle without a spurious subscribe).
if (_children.TryGetValue(driverId, out var fallbackEntry))
SendAuthoredOnly(fallbackEntry.Actor, driverId);
continue;
}
ApplyDiscoveredPlansForDriver(driverId, plansByEquipment);
}
}
private void SpawnChild(DriverInstanceSpec spec)
@@ -32,6 +32,16 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
{
public static readonly TimeSpan DefaultReconnectInterval = TimeSpan.FromSeconds(10);
/// <summary>Default interval between bounded post-connect re-discovery passes.</summary>
public static readonly TimeSpan DefaultRediscoverInterval = TimeSpan.FromSeconds(2);
/// <summary>Default cap on the number of post-connect re-discovery passes.</summary>
public const int DefaultRediscoverMaxAttempts = 15;
/// <summary>Default per-pass timeout for <see cref="ITagDiscovery.DiscoverAsync"/> during
/// bounded post-connect re-discovery. Bounds the mailbox suspension time; production default 30 s.</summary>
public static readonly TimeSpan DefaultRediscoverDiscoverTimeout = TimeSpan.FromSeconds(30);
public sealed record InitializeRequested(string DriverConfigJson);
public sealed record InitializeSucceeded(int Generation);
public sealed record InitializeFailed(string Reason, int Generation);
@@ -98,6 +108,25 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
/// subscription that un-gates an <see cref="IAlarmSource"/> driver's feed. Handled async so the
/// <see cref="IAlarmSource.SubscribeAlarmsAsync"/> call is bounded + off the synchronous handlers.</summary>
private sealed record SubscribeAlarms;
/// <summary>Published to the parent (DriverHostActor) after each post-connect discovery pass so it can
/// graft the driver's discovered FixedTree nodes under the equipment. Empty/duplicate sets are fine —
/// the parent dedups and injection is idempotent.</summary>
public sealed record DiscoveredNodesReady(string DriverInstanceId, IReadOnlyList<DiscoveredNode> Nodes);
/// <summary>
/// Sent by <see cref="DriverHostActor"/> to ask this driver child to re-run post-connect discovery
/// after the host rebinds the driver to a new equipment. Handled only in <c>Connected</c>, where it
/// re-kicks <see cref="StartDiscovery"/> — which already honours the driver's
/// <see cref="ITagDiscovery.RediscoverPolicy"/> and the <see cref="ITagDiscovery"/> guard, tagging the
/// fresh pass with the current init generation. In any non-Connected state it is a deliberate no-op:
/// the driver's eventual (re)connect re-discovers anyway, so there is nothing to do and nothing to log.
/// </summary>
public sealed record TriggerRediscovery;
/// <summary>Internal self-tick driving bounded post-connect re-discovery (FixedTree populates ~02s after connect).
/// <paramref name="PreviousSignature"/> is the ordered-distinct full-reference signature of the prior pass's
/// captured set (empty string on the first tick); re-discovery stops once a non-empty set repeats it.</summary>
private sealed record RediscoverTick(int Generation, int Attempt, string PreviousSignature);
public sealed class RetryConnect
{
public static readonly RetryConnect Instance = new();
@@ -112,6 +141,19 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
private readonly string _clusterId;
private readonly IDriverHealthPublisher _healthPublisher;
private readonly TimeSpan _reconnectInterval;
/// <summary>Interval between bounded post-connect re-discovery passes. Production default 2s; tests
/// inject a tiny value so the loop runs without real-time waits.</summary>
private readonly TimeSpan _rediscoverInterval;
/// <summary>Cap on the number of post-connect re-discovery passes — a backstop so a never-stabilising
/// (or perpetually-empty) discovered set cannot spin the loop forever. Production default 15.</summary>
private readonly int _rediscoverMaxAttempts;
/// <summary>Per-pass timeout for <see cref="ITagDiscovery.DiscoverAsync"/> during bounded post-connect
/// re-discovery. Bounds the mailbox suspension time. Production default 30 s; tests may inject a shorter
/// value. Stored to allow injection rather than hardcoding.</summary>
private readonly TimeSpan _rediscoverDiscoverTimeout;
private readonly ILoggingAdapter _log = Context.GetLogger();
private string? _currentConfigJson;
@@ -167,18 +209,27 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
/// stub paths don't need to provide one.</param>
/// <param name="clusterId">Optional cluster identifier forwarded in <see cref="DriverHealthChanged"/> messages;
/// defaults to an empty string when not provided (e.g. in unit tests).</param>
/// <param name="rediscoverInterval">Optional interval between post-connect re-discovery passes; defaults to 2 seconds.</param>
/// <param name="rediscoverMaxAttempts">Optional cap on re-discovery passes; defaults to 15.</param>
/// <param name="rediscoverDiscoverTimeout">Optional per-pass timeout for <see cref="ITagDiscovery.DiscoverAsync"/>; defaults to 30 seconds.</param>
public static Props Props(
IDriver driver,
TimeSpan? reconnectInterval = null,
bool startStubbed = false,
IDriverHealthPublisher? healthPublisher = null,
string? clusterId = null) =>
string? clusterId = null,
TimeSpan? rediscoverInterval = null,
int rediscoverMaxAttempts = DefaultRediscoverMaxAttempts,
TimeSpan? rediscoverDiscoverTimeout = null) =>
Akka.Actor.Props.Create(() => new DriverInstanceActor(
driver,
reconnectInterval ?? DefaultReconnectInterval,
startStubbed,
healthPublisher ?? NullDriverHealthPublisher.Instance,
clusterId ?? string.Empty));
clusterId ?? string.Empty,
rediscoverInterval,
rediscoverMaxAttempts,
rediscoverDiscoverTimeout));
/// <summary>
/// Returns true when the driver should boot in DEV-STUB mode based on host platform and
@@ -210,18 +261,27 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
/// <param name="startStubbed">If true, start in stub mode for testing or unavailable platforms.</param>
/// <param name="healthPublisher">Sink for health-change notifications; must not be null.</param>
/// <param name="clusterId">Cluster identifier forwarded in health snapshots.</param>
/// <param name="rediscoverInterval">Interval between post-connect re-discovery passes; defaults to 2 seconds.</param>
/// <param name="rediscoverMaxAttempts">Cap on the number of re-discovery passes; defaults to 15.</param>
/// <param name="rediscoverDiscoverTimeout">Per-pass timeout for <see cref="ITagDiscovery.DiscoverAsync"/>; defaults to 30 seconds.</param>
public DriverInstanceActor(
IDriver driver,
TimeSpan reconnectInterval,
bool startStubbed = false,
IDriverHealthPublisher? healthPublisher = null,
string? clusterId = null)
string? clusterId = null,
TimeSpan? rediscoverInterval = null,
int rediscoverMaxAttempts = DefaultRediscoverMaxAttempts,
TimeSpan? rediscoverDiscoverTimeout = null)
{
_driver = driver;
_driverInstanceId = driver.DriverInstanceId;
_clusterId = clusterId ?? string.Empty;
_healthPublisher = healthPublisher ?? NullDriverHealthPublisher.Instance;
_reconnectInterval = reconnectInterval;
_rediscoverInterval = rediscoverInterval ?? DefaultRediscoverInterval;
_rediscoverMaxAttempts = rediscoverMaxAttempts;
_rediscoverDiscoverTimeout = rediscoverDiscoverTimeout ?? DefaultRediscoverDiscoverTimeout;
OtOpcUaTelemetry.DriverInstanceLifecycle.Add(1,
new KeyValuePair<string, object?>("event", startStubbed ? "spawn_stub" : "spawn"),
new KeyValuePair<string, object?>("driver_type", driver.DriverType));
@@ -259,6 +319,11 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
Receive<DisconnectObserved>(_ => { /* stubbed drivers don't disconnect */ });
Receive<ForceReconnect>(_ => { /* stubbed drivers don't reconnect */ });
Receive<SetDesiredSubscriptions>(StoreDesiredSubscriptions);
// Stubbed drivers never enter Connected, so they never kick discovery; swallow defensively in case a
// re-discovery self-tick is ever routed here so it doesn't surface as an Akka Unhandled message.
Receive<RediscoverTick>(_ => { });
// A TriggerRediscovery is meaningless to a stubbed (never-Connected) driver — silently ignore it.
Receive<TriggerRediscovery>(_ => { });
Receive<HealthPollTick>(_ => PublishHealthSnapshot());
}
@@ -284,6 +349,7 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
ResubscribeDesired();
AttachAlarmSource();
SubscribeDesiredAlarms();
StartDiscovery();
});
Receive<InitializeFailed>(msg =>
{
@@ -311,6 +377,12 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
// A SubscribeAlarms self-tell (from Connected) can be overtaken by an already-queued disconnect into
// this state; swallow it so it doesn't dead-letter — the next Connected entry re-subscribes.
Receive<SubscribeAlarms>(_ => { });
// Likewise the attempt-0 re-discovery self-tick (sent on Connected entry) can be overtaken by an
// already-queued disconnect; swallow it — the next Connected entry re-kicks discovery.
Receive<RediscoverTick>(_ => { });
// A TriggerRediscovery arriving while not Connected is a deliberate no-op — the (re)connect path
// re-runs discovery anyway. Swallow it so it stays a clean silent no-op (no Unhandled event).
Receive<TriggerRediscovery>(_ => { });
Receive<HealthPollTick>(_ => PublishHealthSnapshot());
}
@@ -321,6 +393,7 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
{
_log.Warning("DriverInstance {Id}: disconnect observed ({Reason}); reconnecting",
_driverInstanceId, msg.Reason);
Timers.Cancel("rediscover");
DetachSubscription();
RecordFault();
Become(Reconnecting);
@@ -329,10 +402,25 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
Receive<ForceReconnect>(_ =>
{
_log.Info("DriverInstance {Id}: ForceReconnect requested by admin; re-entering Reconnecting", _driverInstanceId);
Timers.Cancel("rediscover");
DetachSubscription();
Become(Reconnecting);
PublishHealthSnapshot();
});
ReceiveAsync<RediscoverTick>(HandleRediscoverAsync);
// The host asks for a fresh discovery pass after rebinding the driver to a new equipment. Cancel any
// pending rediscover tick FIRST — mirroring ForceReconnect/DisconnectObserved — so a stale tick left
// over from the prior loop can't fire alongside the freshly-kicked one, then re-kick the bounded loop
// via StartDiscovery (honours RediscoverPolicy + the ITagDiscovery guard, tagged with the current
// _initGeneration). Only handled here in Connected — non-Connected states no-op it below. A stale tick
// that still slips through (one already mid-async-handler) is benign: the parent dedups
// DiscoveredNodesReady and node injection is idempotent — the Cancel just avoids the avoidable double
// pass in the common case.
Receive<TriggerRediscovery>(_ =>
{
Timers.Cancel("rediscover");
StartDiscovery();
});
ReceiveAsync<WriteAttribute>(HandleWriteAsync);
ReceiveAsync<RouteAlarmAck>(HandleAcknowledgeAsync);
ReceiveAsync<Subscribe>(HandleSubscribeAsync);
@@ -390,6 +478,7 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
ResubscribeDesired();
AttachAlarmSource();
SubscribeDesiredAlarms();
StartDiscovery(); // re-run discovery on reconnect — keeps the injected tree fresh if the backend's capabilities changed
});
// A failure here is a no-op regardless of generation — the retry timer keeps trying the
// current config; only a (generation-matched) InitializeSucceeded transitions state.
@@ -412,6 +501,12 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
// A SubscribeAlarms self-tell (from Connected) can be overtaken by an already-queued disconnect into
// this state; swallow it so it doesn't dead-letter — the next Connected entry re-subscribes.
Receive<SubscribeAlarms>(_ => { });
// Likewise the attempt-0 re-discovery self-tick (sent on Connected entry) can be overtaken by an
// already-queued disconnect; swallow it — the next Connected entry re-kicks discovery.
Receive<RediscoverTick>(_ => { });
// A TriggerRediscovery arriving while not Connected is a deliberate no-op — the (re)connect path
// re-runs discovery anyway. Swallow it so it stays a clean silent no-op (no Unhandled event).
Receive<TriggerRediscovery>(_ => { });
Receive<HealthPollTick>(_ => PublishHealthSnapshot());
Timers.StartPeriodicTimer("retry-connect", RetryConnect.Instance, _reconnectInterval);
}
@@ -677,6 +772,92 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
}
}
/// <summary>Kick the bounded post-connect re-discovery loop on a <c>Connected</c> entry. A no-op unless the
/// driver exposes <see cref="ITagDiscovery"/> (nothing to inject otherwise). Self-sends the first
/// <see cref="RediscoverTick"/> tagged with the current init generation so a tick that outlives a reconnect
/// is rejected by the generation guard in <see cref="HandleRediscoverAsync"/>.
/// <para>Honours the driver's <see cref="ITagDiscovery.RediscoverPolicy"/>: <c>Never</c> opts out entirely
/// (no tick scheduled); <c>Once</c> runs a single pass (the loop stops after the first publish in
/// <see cref="HandleRediscoverAsync"/>); <c>UntilStable</c> retries each (re)connect, bounded by
/// stop-on-stable (the discovered-set signature repeats) + the attempt cap.</para></summary>
private void StartDiscovery()
{
if (_driver is not ITagDiscovery discovery) return; // driver doesn't expose discovery — nothing to inject
if (discovery.RediscoverPolicy == DiscoveryRediscoverPolicy.Never)
{
// Driver opts out of post-connect discovery — don't even schedule the first tick.
_log.Debug("DriverInstance {Id}: RediscoverPolicy=Never — skipping post-connect discovery", _driverInstanceId);
return;
}
Self.Tell(new RediscoverTick(_initGeneration, Attempt: 0, PreviousSignature: string.Empty));
}
/// <summary>Runs one post-connect discovery pass: captures the driver's streamed FixedTree via a
/// <see cref="CapturingAddressSpaceBuilder"/> and ships the result to the parent as
/// <see cref="DiscoveredNodesReady"/> (empty/duplicate sets are fine — the parent dedups and injection
/// is idempotent). Retries on the <see cref="_rediscoverInterval"/> until the non-empty discovered SET
/// has STABILISED (the ordered-distinct full-reference signature repeats — robust for incremental/paged
/// browsers where a count alone could falsely settle a partial tree) or the <see cref="_rediscoverMaxAttempts"/>
/// cap is hit, whichever comes first; keeps retrying while empty because a FOCAS-style FixedTree cache may
/// still be populating.
/// <para>Limitation: this assumes a driver's discovered set only GROWS toward a stable shape (true for
/// FOCAS — its FixedTree appears once, and on the wonder deploy the driver-config <c>_options.Tags</c> is
/// empty so the set is 0 until the cache populates). A driver that emits an initial non-empty set and
/// later grows could stop early on a transient repeat; acceptable for current scope.</para></summary>
private async Task HandleRediscoverAsync(RediscoverTick tick)
{
if (tick.Generation != _initGeneration) return; // stale (a reconnect superseded this pass)
if (_driver is not ITagDiscovery discovery) return;
IReadOnlyList<DiscoveredNode> nodes;
try
{
var builder = new CapturingAddressSpaceBuilder();
// Bound the browse — ReceiveAsync suspends the mailbox for the whole handler, so an unbounded
// DiscoverAsync would block DisconnectObserved / ForceReconnect / writes / health-poll behind it.
using var cts = new CancellationTokenSource(_rediscoverDiscoverTimeout);
// NO ConfigureAwait(false): a genuinely-async DiscoverAsync (Galaxy / OpcUaClient / TwinCAT) must
// resume on the actor task scheduler so the Context.Parent.Tell + Timers calls below run with a
// live ActorContext. ConfigureAwait(false) would resume off-context and throw
// NotSupportedException("no active ActorContext") — see the same warning on HandleSubscribeAsync.
await discovery.DiscoverAsync(builder, cts.Token);
nodes = builder.Nodes.ToArray(); // immutable snapshot — never hand the builder's live list across actors
}
catch (Exception ex)
{
_log.Warning(ex, "DriverInstance {Id}: discovery pass {Attempt} failed; will retry", _driverInstanceId, tick.Attempt);
nodes = Array.Empty<DiscoveredNode>();
}
// Belt-and-suspenders: under ReceiveAsync the mailbox is suspended for the whole handler, so
// _initGeneration cannot change mid-await — the pre-await guard + Timers.Cancel("rediscover") on
// disconnect + single-timer key reuse are the primary protections. Re-checked in case that changes.
if (tick.Generation != _initGeneration) return;
Context.Parent.Tell(new DiscoveredNodesReady(_driverInstanceId, nodes));
// Honour the driver's re-discovery policy. A Once driver runs a single post-connect pass per
// (re)connect regardless of whether DiscoverAsync is synchronous or async — one published pass is
// complete, so the retry loop is skipped (no further tick scheduled). (Never never reaches here —
// StartDiscovery returns before the first tick.) UntilStable falls through to the stop-on-stable +
// attempt-cap logic below.
if (discovery.RediscoverPolicy == DiscoveryRediscoverPolicy.Once)
{
_log.Debug("DriverInstance {Id}: RediscoverPolicy=Once — single discovery pass, not scheduling another", _driverInstanceId);
return;
}
// Stop when the non-empty discovered SET has stabilised (its signature repeats), or the attempt cap
// is hit. Keep retrying while empty (a FixedTree cache may still be populating). First tick carries "".
var signature = string.Join('\u0001',
nodes.Select(n => n.FullReference).Distinct(StringComparer.Ordinal).OrderBy(x => x, StringComparer.Ordinal));
var stableNonEmpty = nodes.Count > 0 && string.Equals(signature, tick.PreviousSignature, StringComparison.Ordinal);
if (tick.Attempt + 1 < _rediscoverMaxAttempts && !stableNonEmpty)
Timers.StartSingleTimer("rediscover", new RediscoverTick(tick.Generation, tick.Attempt + 1, signature), _rediscoverInterval);
else
_log.Debug("DriverInstance {Id}: discovery settled after {Attempt} pass(es), {Count} node(s)", _driverInstanceId, tick.Attempt + 1, nodes.Count);
}
/// <summary>Records the host's desired subscription set without touching the live subscription.
/// The set is (re)applied by <see cref="ResubscribeDesired"/> on the next <c>Connected</c> entry.</summary>
private void StoreDesiredSubscriptions(SetDesiredSubscriptions msg)
@@ -56,6 +56,16 @@ public sealed class OpcUaPublishActor : ReceiveActor, IWithTimers
/// fall back to the latest sealed deployment (lags a not-yet-sealed apply by one revision).
/// </summary>
public sealed record RebuildAddressSpace(CorrelationId Correlation, DeploymentId? DeploymentId = null);
/// <summary>Inject driver-discovered nodes (FixedTree) under an equipment at runtime (post-connect).</summary>
/// <param name="EquipmentRootNodeId">The OPC UA NodeId of the equipment root folder to inject the
/// discovered nodes under (e.g. "EQ-3686c0272279"); also the node the NodeAdded model-change is
/// announced under.</param>
public sealed record MaterialiseDiscoveredNodes(
string EquipmentRootNodeId,
IReadOnlyList<DiscoveredFolder> Folders,
IReadOnlyList<DiscoveredVariable> Variables);
public sealed record ServiceLevelChanged(byte ServiceLevel);
private readonly IOpcUaAddressSpaceSink _sink;
@@ -217,6 +227,7 @@ public sealed class OpcUaPublishActor : ReceiveActor, IWithTimers
Receive<AttributeValueUpdate>(HandleAttributeUpdate);
Receive<AlarmStateUpdate>(HandleAlarmUpdate);
Receive<RebuildAddressSpace>(HandleRebuild);
Receive<MaterialiseDiscoveredNodes>(HandleMaterialiseDiscovered);
Receive<ServiceLevelChanged>(HandleServiceLevelChanged);
Receive<RedundancyStateChanged>(HandleRedundancyStateChanged);
Receive<DbHealthProbeActor.DbHealthStatus>(HandleDbHealthStatus);
@@ -390,6 +401,19 @@ public sealed class OpcUaPublishActor : ReceiveActor, IWithTimers
}
}
/// <summary>Forwards driver-discovered (FixedTree) nodes to the applier so they are injected under
/// the equipment at runtime. No-op (logged) when no applier is wired (dev/Mac/legacy seam), matching the
/// optional-applier tolerance of <see cref="HandleRebuild"/>.</summary>
private void HandleMaterialiseDiscovered(MaterialiseDiscoveredNodes msg)
{
if (_applier is null)
{
_log.Debug("OpcUaPublish: no applier wired — discarding MaterialiseDiscoveredNodes for {Equipment}", msg.EquipmentRootNodeId);
return;
}
_applier.MaterialiseDiscoveredNodes(msg.EquipmentRootNodeId, msg.Folders, msg.Variables);
}
private void HandleServiceLevelChanged(ServiceLevelChanged msg)
{
// Always publish the FIRST computed level, even if it equals the byte-default 0. Otherwise a
@@ -156,6 +156,7 @@ public class DeferredAddressSpaceSinkTests
public void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName) { }
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
public void RebuildAddressSpace() => RebuildCalled = true;
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
}
private sealed class SpySurgicalSink : IOpcUaAddressSpaceSink, ISurgicalAddressSpaceSink
@@ -169,6 +170,7 @@ public class DeferredAddressSpaceSinkTests
public void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName) { }
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
public void RebuildAddressSpace() { }
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
public bool UpdateTagAttributes(string variableNodeId, bool writable, string? historianTagname, string dataType, bool isArray, uint? arrayLength)
{
@@ -18,6 +18,17 @@ public sealed class AbLegacyDriverTests
drv.DriverInstanceId.ShouldBe("drv-1");
}
/// <summary>
/// Verifies AbLegacy opts into run-once post-connect re-discovery — it discovers its
/// complete node set synchronously from config, with no FOCAS-style background cache fill.
/// </summary>
[Fact]
public void RediscoverPolicy_is_Once()
{
var drv = new AbLegacyDriver(new AbLegacyDriverOptions(), "drv-1");
drv.RediscoverPolicy.ShouldBe(DiscoveryRediscoverPolicy.Once);
}
/// <summary>Verifies that InitializeAsync with devices assigns family profiles.</summary>
[Fact]
public async Task InitializeAsync_with_devices_assigns_family_profiles()
@@ -70,9 +70,10 @@ public sealed class FocasDriverProbeTests
[Fact]
public async Task MalformedHostAddress_Returns_OkFalse_WithNoHostPortMessage()
{
// "not-a-focas-url" is not a focas:// URL — TryParse returns null.
// A foreign URI scheme ("http://…") is rejected by TryParse → null. (A bare
// "{ip}[:{port}]" without a scheme is now tolerated, so it can't be the malformed case.)
var result = await Probe.ProbeAsync(
"{\"devices\":[{\"hostAddress\":\"not-a-focas-url\"}]}",
"{\"devices\":[{\"hostAddress\":\"http://10.0.0.5/\"}]}",
TimeSpan.FromSeconds(3),
TestContext.Current.CancellationToken);
@@ -38,6 +38,25 @@ public sealed class FocasFactoryConfigTests
drv.Options.FixedTree.TimerPollInterval.ShouldBe(TimeSpan.FromSeconds(30));
}
/// <summary>
/// The AdminUI persists FocasCncSeries as its integer value (e.g. <c>"series":6</c> = Thirty_i) —
/// a bare JSON number. The factory must tolerate it (via FlexibleStringConverter) and build the
/// real driver, not throw + fall back to a stub. Regression for the 2026-06-26 wonder data-plane
/// deploy where the driver stubbed on "Cannot get the value of a token type 'Number' as a string".
/// </summary>
[Fact]
public void CreateInstance_accepts_numeric_Series_from_AdminUI_serialization()
{
const string json = """
{"Backend":"wire","series":6,"devices":[{"hostAddress":"10.0.0.5:8193","deviceName":"Makino","series":6,"positionDecimalPlaces":0}]}
""";
var drv = FocasDriverFactoryExtensions.CreateInstance("drv-1", json);
drv.Options.Devices.ShouldHaveSingleItem();
drv.Options.Devices[0].Series.ShouldBe(FocasCncSeries.Thirty_i);
}
/// <summary>Verifies that the AlarmProjection configuration section is mapped to driver options.</summary>
[Fact]
public void CreateInstance_maps_AlarmProjection_section_onto_options()
@@ -0,0 +1,207 @@
using System.Diagnostics;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Driver.FOCAS;
namespace ZB.MOM.WW.OtOpcUa.Driver.FOCAS.Tests;
/// <summary>
/// Coverage for the FOCAS data-plane fix (2026-06-25 equipment-tag investigation): all wire I/O
/// on a device's single FOCAS/2 socket must be serialized (request→response cannot interleave)
/// and every steady-state read/write must be time-bounded so a stalled CNC read surfaces as a
/// recoverable error instead of hanging forever at BadWaitingForInitialData. See
/// <c>docs/plans/2026-06-25-otopcua-equipment-dataplane-investigation.md</c>.
/// </summary>
[Trait("Category", "Unit")]
public sealed class FocasIoSerializationTests
{
private static readonly FocasAddress Macro500 = new(FocasAreaKind.Macro, null, 500, null);
// ---- SynchronizedFocasClient: serialization ----
[Fact]
public async Task Concurrent_reads_are_serialized_onto_the_inner_client()
{
var inner = new RecordingClient { ReadDelay = TimeSpan.FromMilliseconds(20) };
await using var _ = NoopDispose(inner);
var client = new SynchronizedFocasClient(inner, TimeSpan.FromSeconds(5));
var reads = Enumerable.Range(0, 8)
.Select(_ => client.ReadAsync(Macro500, FocasDataType.Float64, CancellationToken.None));
await Task.WhenAll(reads);
inner.MaxConcurrency.ShouldBe(1); // never more than one wire op on the socket at a time
inner.ReadCount.ShouldBe(8);
}
// ---- SynchronizedFocasClient: per-call timeout ----
[Fact]
public async Task A_hung_read_is_bounded_by_the_call_timeout()
{
var inner = new RecordingClient { BlockReadUntilCancelled = true };
var client = new SynchronizedFocasClient(inner, TimeSpan.FromMilliseconds(100));
var sw = Stopwatch.StartNew();
await Should.ThrowAsync<OperationCanceledException>(
() => client.ReadAsync(Macro500, FocasDataType.Float64, CancellationToken.None));
sw.Stop();
sw.Elapsed.ShouldBeLessThan(TimeSpan.FromSeconds(2)); // bounded, not the indefinite OS TCP wait
}
[Fact]
public async Task A_hung_read_does_not_hold_the_socket_for_the_next_call()
{
// The gate must be released when a bounded call times out, otherwise one stall would wedge
// every subsequent op on the device. Read #1 hangs (times out); read #2 must still proceed.
var inner = new TimeoutThenServeClient { FirstCallBlocks = true };
var client = new SynchronizedFocasClient(inner, TimeSpan.FromMilliseconds(100));
await Should.ThrowAsync<OperationCanceledException>(
() => client.ReadAsync(Macro500, FocasDataType.Float64, CancellationToken.None));
var (value, status) = await client.ReadAsync(Macro500, FocasDataType.Float64, CancellationToken.None);
status.ShouldBe(FocasStatusMapper.Good);
value.ShouldBe(42);
}
[Fact]
public async Task Probe_is_not_bounded_by_the_call_timeout()
{
// Connect/Probe carry their own budgets; the decorator must not shrink them to its read budget.
var inner = new RecordingClient { ProbeDelay = TimeSpan.FromMilliseconds(200) };
var client = new SynchronizedFocasClient(inner, TimeSpan.FromMilliseconds(50));
var result = await client.ProbeAsync(CancellationToken.None);
result.ShouldBeTrue();
}
[Fact]
public async Task Zero_call_timeout_disables_the_per_call_bound()
{
var inner = new RecordingClient { ReadDelay = TimeSpan.FromMilliseconds(120) };
var client = new SynchronizedFocasClient(inner, TimeSpan.Zero);
var (value, status) = await client.ReadAsync(Macro500, FocasDataType.Float64, CancellationToken.None);
status.ShouldBe(FocasStatusMapper.Good);
value.ShouldBe(42);
}
[Fact]
public void Dispose_disposes_the_inner_client()
{
var inner = new RecordingClient();
var client = new SynchronizedFocasClient(inner, TimeSpan.FromSeconds(1));
client.Dispose();
inner.DisposeCount.ShouldBe(1);
}
// ---- Driver level: a timed-out read overwrites the seed with a recoverable status ----
[Fact]
public async Task Driver_read_that_times_out_returns_BadCommunicationError_not_a_hang()
{
var factory = new FakeFocasClientFactory { Customise = () => new RecordingClient { BlockReadUntilCancelled = true } };
var drv = new FocasDriver(new FocasDriverOptions
{
Devices = [new FocasDeviceOptions("focas://10.0.0.5:8193")],
Tags = [new FocasTagDefinition("CustomVar", "focas://10.0.0.5:8193", "MACRO:500", FocasDataType.Float64)],
Probe = new FocasProbeOptions { Enabled = false },
Timeout = TimeSpan.FromMilliseconds(150),
}, "drv-1", factory);
await drv.InitializeAsync("{}", CancellationToken.None);
var sw = Stopwatch.StartNew();
var snap = (await drv.ReadAsync(["CustomVar"], CancellationToken.None)).Single();
sw.Stop();
snap.StatusCode.ShouldBe(FocasStatusMapper.BadCommunicationError);
sw.Elapsed.ShouldBeLessThan(TimeSpan.FromSeconds(2)); // bounded by Timeout, not hung
}
[Fact]
public async Task Driver_read_does_not_propagate_a_call_timeout_as_cancellation()
{
// The per-call timeout must NOT bubble out of ReadAsync as OperationCanceledException — that
// would abort the whole poll batch. It must be caught and turned into a per-tag Bad status.
var factory = new FakeFocasClientFactory { Customise = () => new RecordingClient { BlockReadUntilCancelled = true } };
var drv = new FocasDriver(new FocasDriverOptions
{
Devices = [new FocasDeviceOptions("focas://10.0.0.5:8193")],
Tags = [new FocasTagDefinition("CustomVar", "focas://10.0.0.5:8193", "MACRO:500", FocasDataType.Float64)],
Probe = new FocasProbeOptions { Enabled = false },
Timeout = TimeSpan.FromMilliseconds(120),
}, "drv-1", factory);
await drv.InitializeAsync("{}", CancellationToken.None);
// Should complete (not throw) with a Bad snapshot, even though the caller's token is never cancelled.
var snaps = await drv.ReadAsync(["CustomVar"], CancellationToken.None);
snaps.Single().StatusCode.ShouldBe(FocasStatusMapper.BadCommunicationError);
}
private static DisposeGuard NoopDispose(IDisposable d) => new(d);
private sealed class DisposeGuard(IDisposable inner) : IAsyncDisposable
{
public ValueTask DisposeAsync() { inner.Dispose(); return ValueTask.CompletedTask; }
}
/// <summary>Fake that records concurrency + optionally delays/blocks reads and probes.</summary>
private class RecordingClient : FakeFocasClient
{
private int _current;
public int MaxConcurrency;
public int ReadCount;
public TimeSpan ReadDelay = TimeSpan.Zero;
public bool BlockReadUntilCancelled;
public TimeSpan ProbeDelay = TimeSpan.Zero;
public override async Task<(object? value, uint status)> ReadAsync(
FocasAddress address, FocasDataType type, CancellationToken ct)
{
Interlocked.Increment(ref ReadCount);
var observed = Interlocked.Increment(ref _current);
InterlockedMax(ref MaxConcurrency, observed);
try
{
if (BlockReadUntilCancelled) await Task.Delay(Timeout.Infinite, ct).ConfigureAwait(false);
else if (ReadDelay > TimeSpan.Zero) await Task.Delay(ReadDelay, ct).ConfigureAwait(false);
return ((object?)42, FocasStatusMapper.Good);
}
finally { Interlocked.Decrement(ref _current); }
}
public override async Task<bool> ProbeAsync(CancellationToken ct)
{
if (ProbeDelay > TimeSpan.Zero) await Task.Delay(ProbeDelay, ct).ConfigureAwait(false);
return true;
}
private static void InterlockedMax(ref int target, int value)
{
int seen;
do { seen = Volatile.Read(ref target); if (value <= seen) return; }
while (Interlocked.CompareExchange(ref target, value, seen) != seen);
}
}
/// <summary>First read blocks until cancelled; subsequent reads serve a Good value immediately.</summary>
private sealed class TimeoutThenServeClient : FakeFocasClient
{
public bool FirstCallBlocks;
private int _calls;
public override async Task<(object? value, uint status)> ReadAsync(
FocasAddress address, FocasDataType type, CancellationToken ct)
{
var n = Interlocked.Increment(ref _calls);
if (n == 1 && FirstCallBlocks) await Task.Delay(Timeout.Infinite, ct).ConfigureAwait(false);
return ((object?)42, FocasStatusMapper.Good);
}
}
}
@@ -245,6 +245,32 @@ public sealed class FocasReadWriteTests
/// <summary>Verifies that cancellation signals are propagated.</summary>
[Fact]
public async Task Cancellation_propagates()
{
var (drv, factory) = NewDriver(
new FocasTagDefinition("X", "focas://10.0.0.5:8193", "R100", FocasDataType.Byte));
await drv.InitializeAsync("{}", CancellationToken.None);
using var cts = new CancellationTokenSource();
cts.Cancel();
factory.Customise = () => new FakeFocasClient
{
ThrowOnRead = true,
Exception = new OperationCanceledException(cts.Token),
};
// A CANCELLATION of the caller's token must propagate (abort the read). This is distinct
// from a per-call timeout — an OCE raised while the caller's token is still live is swallowed
// to a per-tag BadCommunicationError (see Swallows_a_spurious_read_OCE_when_caller_not_cancelled).
await Should.ThrowAsync<OperationCanceledException>(
() => drv.ReadAsync(["X"], cts.Token));
}
/// <summary>
/// An OperationCanceledException from the wire read while the CALLER'S token is NOT cancelled
/// (e.g. a per-call timeout firing) must be turned into a per-tag BadCommunicationError, not
/// propagated — otherwise one stalled tag would abort the whole poll batch.
/// </summary>
[Fact]
public async Task Swallows_a_spurious_read_OCE_when_caller_not_cancelled()
{
var (drv, factory) = NewDriver(
new FocasTagDefinition("X", "focas://10.0.0.5:8193", "R100", FocasDataType.Byte));
@@ -255,8 +281,8 @@ public sealed class FocasReadWriteTests
Exception = new OperationCanceledException(),
};
await Should.ThrowAsync<OperationCanceledException>(
() => drv.ReadAsync(["X"], CancellationToken.None));
var snap = (await drv.ReadAsync(["X"], CancellationToken.None)).Single();
snap.StatusCode.ShouldBe(FocasStatusMapper.BadCommunicationError);
}
/// <summary>Verifies that ShutdownAsync disposes the client.</summary>
@@ -20,6 +20,9 @@ public sealed class FocasScaffoldingTests
[InlineData("focas://cnc-01.factory.internal:8193", "cnc-01.factory.internal", 8193)]
[InlineData("focas://10.0.0.5:12345", "10.0.0.5", 12345)]
[InlineData("FOCAS://10.0.0.5:8193", "10.0.0.5", 8193)] // case-insensitive scheme
[InlineData("10.201.31.5:8193", "10.201.31.5", 8193)] // scheme-less (AdminUI-persisted form)
[InlineData("10.0.0.5", "10.0.0.5", 8193)] // scheme-less, default port
[InlineData("cnc-01.factory.internal:8193", "cnc-01.factory.internal", 8193)] // scheme-less hostname
public void HostAddress_parses_valid(string input, string host, int port)
{
var parsed = FocasHostAddress.TryParse(input);
@@ -200,6 +203,18 @@ public sealed class FocasScaffoldingTests
drv.DriverInstanceId.ShouldBe("drv-1");
}
/// <summary>
/// Verifies FOCAS opts into retry-until-stable post-connect re-discovery — its
/// FixedTree subtree is populated asynchronously by a background loop a couple of
/// seconds after connect, so a single DiscoverAsync pass would miss it.
/// </summary>
[Fact]
public void RediscoverPolicy_is_UntilStable()
{
var drv = new FocasDriver(new FocasDriverOptions(), "drv-1");
drv.RediscoverPolicy.ShouldBe(DiscoveryRediscoverPolicy.UntilStable);
}
/// <summary>Verifies InitializeAsync parses device addresses correctly.</summary>
[Fact]
public async Task InitializeAsync_parses_device_addresses()
@@ -224,9 +239,11 @@ public sealed class FocasScaffoldingTests
[Fact]
public async Task InitializeAsync_malformed_address_faults()
{
// A non-focas:// URI scheme is rejected by TryParse (a bare "{ip}[:{port}]" is now
// tolerated, so the malformed case must carry a foreign scheme).
var drv = new FocasDriver(new FocasDriverOptions
{
Devices = [new FocasDeviceOptions("not-an-address")],
Devices = [new FocasDeviceOptions("http://10.0.0.5/")],
}, "drv-1");
await Should.ThrowAsync<InvalidOperationException>(
@@ -4,6 +4,7 @@ using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
using ZB.MOM.WW.OtOpcUa.ControlPlane.AdminOperations;
using ZB.MOM.WW.OtOpcUa.ControlPlane.Tests.Harness;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Tests;
@@ -85,6 +86,58 @@ public sealed class ConfigComposerTests : ControlPlaneActorTestBase
artifact.RevisionHash.ShouldMatch("^[0-9a-f]{64}$");
}
/// <summary>
/// Verifies that <see cref="ConfigComposer.SnapshotAndFlattenAsync"/> serialises a
/// <see cref="Device"/>'s <c>HostAddress</c> into the artifact blob and that
/// <see cref="DeploymentArtifact.ParseComposition(ReadOnlySpan{byte})"/> decodes it back
/// as the equipment's <see cref="ZB.MOM.WW.OtOpcUa.OpcUaServer.EquipmentNode.DeviceHost"/>
/// (follow-up E). Guards the real serialize→decode seam: if ConfigComposer's Device
/// serialisation ever drifted, DeviceHost would silently become null in production
/// (feature E degrades to a warn-skip) while hand-rolled artifact tests stayed green.
/// </summary>
[Fact]
public async Task DeviceHost_survives_ConfigComposer_to_ParseComposition_round_trip()
{
var f = NewInMemoryDbFactory();
await using (var db = f.CreateDbContext())
{
db.ServerClusters.Add(NewCluster("c1"));
db.Namespaces.Add(new Namespace
{
NamespaceId = "ns-eq", ClusterId = "c1",
Kind = NamespaceKind.Equipment, NamespaceUri = "urn:eq",
});
db.DriverInstances.Add(new DriverInstance
{
DriverInstanceId = "drv-1", ClusterId = "c1", NamespaceId = "ns-eq",
Name = "Focas", DriverType = "Focas", DriverConfig = "{}",
});
db.Devices.Add(new Device
{
DeviceId = "dev-1", DriverInstanceId = "drv-1",
Name = "dev-1", DeviceConfig = "{\"HostAddress\":\"10.9.9.9:8193\"}",
});
db.UnsAreas.Add(new UnsArea { UnsAreaId = "area-1", ClusterId = "c1", Name = "area-1" });
db.UnsLines.Add(new UnsLine { UnsLineId = "line-1", UnsAreaId = "area-1", Name = "line-1" });
db.Equipment.Add(new Equipment
{
EquipmentId = "eq-1", DriverInstanceId = "drv-1", DeviceId = "dev-1",
UnsLineId = "line-1", Name = "machine-1", MachineCode = "MACHINE_001",
});
await db.SaveChangesAsync();
}
await using var readDb = f.CreateDbContext();
var artifact = await ConfigComposer.SnapshotAndFlattenAsync(readDb);
var composition = DeploymentArtifact.ParseComposition(artifact.Blob);
var node = composition.EquipmentNodes.ShouldHaveSingleItem();
node.EquipmentId.ShouldBe("eq-1");
node.DriverInstanceId.ShouldBe("drv-1");
node.DeviceId.ShouldBe("dev-1");
node.DeviceHost.ShouldBe("10.9.9.9:8193");
}
private static readonly DateTime FixedTimestamp = new(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc);
private static ServerCluster NewCluster(string id) => new()
@@ -22,6 +22,7 @@
<ItemGroup>
<ProjectReference Include="..\..\..\src\Server\ZB.MOM.WW.OtOpcUa.ControlPlane\ZB.MOM.WW.OtOpcUa.ControlPlane.csproj"/>
<ProjectReference Include="..\..\..\src\Server\ZB.MOM.WW.OtOpcUa.Runtime\ZB.MOM.WW.OtOpcUa.Runtime.csproj"/>
<ProjectReference Include="..\..\..\src\Core\ZB.MOM.WW.OtOpcUa.Configuration\ZB.MOM.WW.OtOpcUa.Configuration.csproj"/>
<ProjectReference Include="..\..\..\src\Core\ZB.MOM.WW.OtOpcUa.Commons\ZB.MOM.WW.OtOpcUa.Commons.csproj"/>
</ItemGroup>
@@ -333,5 +333,7 @@ public sealed class AddressSpaceApplierHierarchyTests : IDisposable
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
/// <summary>Rebuilds the address space (stub implementation for testing).</summary>
public void RebuildAddressSpace() { }
/// <summary>Announces a NodeAdded model-change (stub implementation for testing).</summary>
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
}
}
@@ -567,6 +567,114 @@ public sealed class AddressSpaceApplierTests
.ShouldBe(("alm-1", "eq-1", "HighTemp", "OffNormalAlarm", 700, false));
}
/// <summary>Task 4 — MaterialiseDiscoveredNodes ensures the discovered folders PARENT-FIRST (ordered by
/// depth = '/' count) and the discovered variables at their folder-scoped NodeIds/parents, with variables
/// created READ-ONLY (writable == false), then raises EXACTLY ONE NodeAdded model-change under the
/// equipment root. Folders are passed in REVERSE (child-first) to prove the applier re-orders them
/// parent-first before ensuring (a child folder's parent must exist first).</summary>
[Fact]
public void MaterialiseDiscoveredNodes_ensures_folders_parent_first_read_only_variables_and_raises_model_change_once()
{
var sink = new RecordingSink();
var applier = new AddressSpaceApplier(sink, NullLogger<AddressSpaceApplier>.Instance);
// Child folder listed BEFORE its parent — the applier must re-order parent-first.
var folders = new[]
{
new DiscoveredFolder("EQ-1/FOCAS/Identity", "EQ-1/FOCAS", "Identity"),
new DiscoveredFolder("EQ-1/FOCAS", "EQ-1", "FOCAS"),
};
var variables = new[]
{
new DiscoveredVariable("EQ-1/FOCAS/Identity/SeriesNumber", "EQ-1/FOCAS/Identity", "SeriesNumber",
"String", Writable: false, IsArray: false, ArrayLength: null),
};
applier.MaterialiseDiscoveredNodes("EQ-1", folders, variables);
// Folders ensured parent-first regardless of input order (shallowest depth first).
sink.FolderCalls.Select(f => f.NodeId).ShouldBe(new[] { "EQ-1/FOCAS", "EQ-1/FOCAS/Identity" });
sink.FolderCalls.ShouldContain(("EQ-1/FOCAS", "EQ-1", "FOCAS"));
sink.FolderCalls.ShouldContain(("EQ-1/FOCAS/Identity", "EQ-1/FOCAS", "Identity"));
// Variable ensured at its folder-scoped NodeId, parented to its sub-folder, READ-ONLY.
sink.VariableCalls.ShouldHaveSingleItem()
.ShouldBe(("EQ-1/FOCAS/Identity/SeriesNumber", "EQ-1/FOCAS/Identity", "SeriesNumber", "String", false));
// Exactly one NodeAdded model-change, announced under the equipment root.
sink.ModelChangeCalls.ShouldHaveSingleItem().ShouldBe("EQ-1");
}
/// <summary>Task 4 — a discovered array variable (rare) authored <c>Writable: true</c> is forced
/// READ-ONLY (mirrors MaterialiseEquipmentTags: the driver write path can't handle arrays), while the
/// IsArray / ArrayLength flags are forwarded verbatim to the sink.</summary>
[Fact]
public void MaterialiseDiscoveredNodes_array_variable_is_forced_read_only()
{
var sink = new RecordingSink();
var applier = new AddressSpaceApplier(sink, NullLogger<AddressSpaceApplier>.Instance);
var variables = new[]
{
new DiscoveredVariable("EQ-1/FOCAS/Buffer", "EQ-1", "Buffer", "Int16",
Writable: true, IsArray: true, ArrayLength: 8u),
};
applier.MaterialiseDiscoveredNodes("EQ-1", Array.Empty<DiscoveredFolder>(), variables);
var varCall = sink.VariableCalls.ShouldHaveSingleItem();
varCall.Writable.ShouldBeFalse(); // clamped to read-only despite Writable: true
var arrCall = sink.ArrayCalls.ShouldHaveSingleItem();
arrCall.IsArray.ShouldBeTrue();
arrCall.ArrayLength.ShouldBe(8u);
}
/// <summary>Task 4 — re-applying the SAME discovered plan is idempotent-SAFE: it does not throw, the
/// distinct folder/variable set the applier issues per pass is stable (the real sink early-returns on
/// existing nodes), and a model-change is raised once PER call (twice across two calls).</summary>
[Fact]
public void MaterialiseDiscoveredNodes_is_idempotent_safe_on_repeated_application()
{
var sink = new RecordingSink();
var applier = new AddressSpaceApplier(sink, NullLogger<AddressSpaceApplier>.Instance);
var folders = new[]
{
new DiscoveredFolder("EQ-1/FOCAS", "EQ-1", "FOCAS"),
new DiscoveredFolder("EQ-1/FOCAS/Identity", "EQ-1/FOCAS", "Identity"),
};
var variables = new[]
{
new DiscoveredVariable("EQ-1/FOCAS/Identity/SeriesNumber", "EQ-1/FOCAS/Identity", "SeriesNumber",
"String", Writable: false, IsArray: false, ArrayLength: null),
};
applier.MaterialiseDiscoveredNodes("EQ-1", folders, variables);
Should.NotThrow(() => applier.MaterialiseDiscoveredNodes("EQ-1", folders, variables));
// Each pass re-issues the same parent-first ensures (the real sink dedups via early-return); the
// DISTINCT set the applier produces is stable across re-applies.
sink.FolderCalls.Select(f => f.NodeId).Distinct().ShouldBe(new[] { "EQ-1/FOCAS", "EQ-1/FOCAS/Identity" });
sink.VariableCalls.Select(v => v.NodeId).Distinct().ShouldBe(new[] { "EQ-1/FOCAS/Identity/SeriesNumber" });
// One model-change per call ⇒ two across two calls.
sink.ModelChangeCalls.ShouldBe(new[] { "EQ-1", "EQ-1" });
}
/// <summary>Task 4 — empty input (no folders, no variables) returns WITHOUT touching the sink: no
/// EnsureFolder/EnsureVariable and, crucially, NO NodeAdded model-change.</summary>
[Fact]
public void MaterialiseDiscoveredNodes_empty_input_does_not_touch_sink()
{
var sink = new RecordingSink();
var applier = new AddressSpaceApplier(sink, NullLogger<AddressSpaceApplier>.Instance);
applier.MaterialiseDiscoveredNodes("EQ-1", Array.Empty<DiscoveredFolder>(), Array.Empty<DiscoveredVariable>());
sink.FolderCalls.ShouldBeEmpty();
sink.VariableCalls.ShouldBeEmpty();
sink.ModelChangeCalls.ShouldBeEmpty();
}
/// <summary>Verifies that added equipment tags in an otherwise-empty plan trigger an
/// address-space rebuild (the planner now diffs equipment tags, so a tags-only deploy is no
/// longer a silent no-op).</summary>
@@ -1761,6 +1869,14 @@ public sealed class AddressSpaceApplierTests
}
/// <summary>Records a rebuild address space call.</summary>
public void RebuildAddressSpace() => Interlocked.Increment(ref RebuildCalls);
/// <summary>Gets the queue of NodeAdded model-change announcements (discovered-node injection).</summary>
public ConcurrentQueue<string> ModelChangeQueue { get; } = new();
/// <summary>Gets the list of recorded NodeAdded model-change announcements (discovered-node injection).</summary>
public List<string> ModelChangeCalls => ModelChangeQueue.ToList();
/// <summary>Records a NodeAdded model-change announcement.</summary>
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
public void RaiseNodesAddedModelChange(string affectedNodeId) => ModelChangeQueue.Enqueue(affectedNodeId);
}
/// <summary>A recording sink that does NOT implement <see cref="ISurgicalAddressSpaceSink"/> — used to
@@ -1783,6 +1899,8 @@ public sealed class AddressSpaceApplierTests
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
/// <summary>Records a rebuild address space call.</summary>
public void RebuildAddressSpace() => Interlocked.Increment(ref RebuildCalls);
/// <summary>No-op NodeAdded model-change announcement.</summary>
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
}
private sealed class ThrowingSink : IOpcUaAddressSpaceSink
@@ -1829,5 +1947,7 @@ public sealed class AddressSpaceApplierTests
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
/// <summary>No-op rebuild address space call.</summary>
public void RebuildAddressSpace() { }
/// <summary>No-op NodeAdded model-change announcement.</summary>
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
}
}
@@ -0,0 +1,125 @@
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests;
/// <summary>
/// Covers follow-up E projection: <see cref="EquipmentNode"/> carries the equipment's
/// <c>DriverInstanceId</c> / <c>DeviceId</c> bindings and the resolved <c>DeviceHost</c> (parsed from
/// the bound <see cref="Device"/>'s schemaless <c>DeviceConfig</c> JSON via the shared
/// <see cref="AddressSpaceComposer.TryExtractDeviceHost"/>). A later task grafts a driver's discovered
/// FixedTree onto a zero-tag equipment and partitions a multi-device driver by host using these.
/// </summary>
public sealed class AddressSpaceComposerDeviceHostTests
{
/// <summary>An equipment bound to a driver + a device whose config carries a top-level
/// <c>HostAddress</c> resolves all three fields, with the host trimmed + lower-cased.</summary>
[Fact]
public void Compose_resolves_driver_device_and_device_host()
{
var equipment = new[] { NewEquipment("eq-1", driver: "d1", device: "dev1") };
var devices = new[] { NewDevice("dev1", "d1", "{\"HostAddress\":\"10.0.0.5:8193\"}") };
var node = AddressSpaceComposer.Compose(
Array.Empty<UnsArea>(), Array.Empty<UnsLine>(), equipment,
Array.Empty<DriverInstance>(), Array.Empty<ScriptedAlarm>(), devices: devices)
.EquipmentNodes.ShouldHaveSingleItem();
node.EquipmentId.ShouldBe("eq-1");
node.DriverInstanceId.ShouldBe("d1");
node.DeviceId.ShouldBe("dev1");
node.DeviceHost.ShouldBe("10.0.0.5:8193");
}
/// <summary>An equipment with no driver and no device → all three new fields null (driver-less,
/// no device to resolve a host from).</summary>
[Fact]
public void Compose_equipment_without_driver_or_device_yields_null_bindings()
{
var equipment = new[] { NewEquipment("eq-1", driver: null, device: null) };
var node = AddressSpaceComposer.Compose(
Array.Empty<UnsArea>(), Array.Empty<UnsLine>(), equipment,
Array.Empty<DriverInstance>(), Array.Empty<ScriptedAlarm>(), devices: Array.Empty<Device>())
.EquipmentNodes.ShouldHaveSingleItem();
node.DriverInstanceId.ShouldBeNull();
node.DeviceId.ShouldBeNull();
node.DeviceHost.ShouldBeNull();
}
/// <summary>A bound DeviceId with no matching device row, or a device whose config has no
/// <c>HostAddress</c>, resolves DeviceHost to null while DeviceId is still carried.</summary>
[Fact]
public void Compose_device_host_is_null_when_unresolvable()
{
var equipment = new[]
{
NewEquipment("eq-missing", driver: "d1", device: "dev-missing"),
NewEquipment("eq-nohost", driver: "d1", device: "dev-nohost"),
};
var devices = new[] { NewDevice("dev-nohost", "d1", "{\"Port\":502}") };
var nodes = AddressSpaceComposer.Compose(
Array.Empty<UnsArea>(), Array.Empty<UnsLine>(), equipment,
Array.Empty<DriverInstance>(), Array.Empty<ScriptedAlarm>(), devices: devices)
.EquipmentNodes;
var missing = nodes.Single(n => n.EquipmentId == "eq-missing");
missing.DeviceId.ShouldBe("dev-missing");
missing.DeviceHost.ShouldBeNull();
var noHost = nodes.Single(n => n.EquipmentId == "eq-nohost");
noHost.DeviceId.ShouldBe("dev-nohost");
noHost.DeviceHost.ShouldBeNull();
}
/// <summary>The shared host extractor normalizes (trim + lower-case) and tolerates every malformed
/// shape (blank / non-object / no string HostAddress / blank value / non-JSON) by returning null.</summary>
[Theory]
[InlineData("{\"HostAddress\":\"10.201.31.5:8193\"}", "10.201.31.5:8193")]
[InlineData("{\"HostAddress\":\" HOST-A:8193 \"}", "host-a:8193")] // trimmed + lower-cased
[InlineData("{\"HostAddress\":\"\"}", null)] // blank value
[InlineData("{\"HostAddress\":1234}", null)] // non-string
[InlineData("{\"Port\":502}", null)] // absent
[InlineData("[]", null)] // non-object root
[InlineData("not json", null)] // malformed
[InlineData("", null)] // blank
public void TryExtractDeviceHost_normalizes_and_tolerates(string? deviceConfig, string? expected)
{
AddressSpaceComposer.TryExtractDeviceHost(deviceConfig).ShouldBe(expected);
}
/// <summary>The extracted-out shared normalizer (the single source of truth the FixedTree-partition path
/// reuses on a driver-discovered device-host folder segment) trims + lower-cases, and is idempotent on an
/// already-normalized value — so a segment like <c>" HOST-A:8193 "</c> matches a stored
/// <c>"host-a:8193"</c> DeviceHost.</summary>
[Theory]
[InlineData("10.201.31.5:8193", "10.201.31.5:8193")]
[InlineData(" HOST-A:8193 ", "host-a:8193")]
[InlineData("host-a:8193", "host-a:8193")] // idempotent
[InlineData("H1", "h1")]
public void NormalizeDeviceHost_trims_and_lowercases(string raw, string expected)
{
AddressSpaceComposer.NormalizeDeviceHost(raw).ShouldBe(expected);
}
private static Equipment NewEquipment(string id, string? driver, string? device) => new()
{
EquipmentId = id,
DriverInstanceId = driver,
DeviceId = device,
UnsLineId = "line-1",
Name = id,
MachineCode = id.ToUpperInvariant(),
};
private static Device NewDevice(string deviceId, string driverInstanceId, string deviceConfig) => new()
{
DeviceId = deviceId,
DriverInstanceId = driverInstanceId,
Name = deviceId,
DeviceConfig = deviceConfig,
};
}
@@ -30,8 +30,9 @@ public sealed class DeferredAddressSpaceSinkTests
deferred.WriteValue("x", 42, OpcUaQuality.Good, DateTime.UtcNow);
deferred.WriteAlarmCondition("a-1", Snapshot(active: true), DateTime.UtcNow);
deferred.RebuildAddressSpace();
deferred.RaiseNodesAddedModelChange("eq-1");
inner.Calls.ShouldBe(new[] { "WV:x", "WA:a-1", "RB" });
inner.Calls.ShouldBe(new[] { "WV:x", "WA:a-1", "RB", "NA:eq-1" });
}
/// <summary>Verifies that setting sink to null reverts to null sink.</summary>
@@ -212,6 +213,8 @@ public sealed class DeferredAddressSpaceSinkTests
}
/// <inheritdoc />
public void RebuildAddressSpace() => CallQueue.Enqueue("RB");
/// <inheritdoc />
public void RaiseNodesAddedModelChange(string affectedNodeId) => CallQueue.Enqueue($"NA:{affectedNodeId}");
}
private sealed class SurgicalRecordingSink : IOpcUaAddressSpaceSink, ISurgicalAddressSpaceSink
@@ -249,5 +252,7 @@ public sealed class DeferredAddressSpaceSinkTests
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
/// <inheritdoc />
public void RebuildAddressSpace() { }
/// <inheritdoc />
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
}
}
@@ -0,0 +1,145 @@
using Opc.Ua;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests;
/// <summary>
/// FixedTree injection — the BEHAVIOURAL half of the runtime node-add model-change announcement. When a
/// driver discovers FixedTree nodes AFTER the server is up and they are materialised into the served
/// Equipment address space, already-connected OPC UA clients won't see them unless the server emits a
/// Part 3 <c>GeneralModelChangeEvent</c> (verb <c>NodeAdded</c>) under the affected parent so subscribed
/// clients refresh their browse. <see cref="OtOpcUaNodeManager.RaiseNodesAddedModelChange"/> is that seam
/// (Tasks 4/5 call it after materialising discovered nodes); this test asserts:
/// <list type="bullet">
/// <item>the built event announces the affected parent with verb <c>NodeAdded</c> (the runtime-add
/// counterpart of <see cref="NodeManagerSurgicalShapeUpdateTests"/>'s <c>DataTypeChanged</c> case);</item>
/// <item>raising it is tolerant — callable before AND after nodes exist, and never throws even when the
/// event path is disabled / has no monitored items.</item>
/// </list>
/// <para>
/// Coverage boundary (deliberate, mirrors <see cref="NodeManagerSurgicalShapeUpdateTests"/>): the
/// model-change event is asserted via its <i>builder</i>
/// (<see cref="OtOpcUaNodeManager.BuildNodesAddedModelChange"/>) in isolation, not its end-to-end
/// <c>Server.ReportEvent</c> dispatch — observing that would require a subscribed event monitored-item.
/// The single in-lock report call-site is covered by inspection (it mirrors the shape-changed reporter).
/// </para>
/// </summary>
public sealed class NodeManagerModelChangeOnAddTests : IDisposable
{
private static CancellationToken Ct => TestContext.Current.CancellationToken;
private readonly string _pkiRoot = Path.Combine(
Path.GetTempPath(),
$"otopcua-modelchange-add-{Guid.NewGuid():N}");
/// <summary>The built model-change event announces the affected parent with verb NodeAdded and the parent's
/// TypeDefinition as AffectedType — what model-aware clients consume to re-browse the new children.</summary>
[Trait("Category", "Unit")]
[Fact]
public async Task Built_nodes_added_event_announces_the_affected_parent_with_NodeAdded_verb()
{
var (host, server) = await BootAsync();
var nm = server.NodeManager!;
nm.EnsureFolder("eq-7", parentNodeId: null, displayName: "Equipment 7");
nm.EnsureVariable("eq-7/speed", parentFolderNodeId: "eq-7", displayName: "Speed", dataType: "Float", writable: false);
var parent = nm.TryGetFolder("eq-7")!;
var e = nm.BuildNodesAddedModelChange("eq-7");
e.ShouldNotBeNull();
e.Changes.ShouldNotBeNull();
var changes = e.Changes.Value;
changes.Length.ShouldBe(1);
changes[0].Affected.ShouldBe(parent.NodeId);
changes[0].AffectedType.ShouldBe(ObjectTypeIds.FolderType);
changes[0].Verb.ShouldBe((byte)ModelChangeStructureVerbMask.NodeAdded);
await host.DisposeAsync();
}
/// <summary>For an affected id that is not (yet) materialised, the built event still announces NodeAdded but
/// its AffectedType falls back to <see cref="NodeId.Null"/> (a valid Part 3 "type not applicable") — the
/// documented fallback of <see cref="OtOpcUaNodeManager.BuildNodesAddedModelChange"/>, locked in as an
/// invariant.</summary>
[Trait("Category", "Unit")]
[Fact]
public async Task Built_event_for_unknown_id_falls_back_to_null_AffectedType()
{
var (host, server) = await BootAsync();
var nm = server.NodeManager!;
// No EnsureFolder/EnsureVariable for this id — it is not in the node maps.
var e = nm.BuildNodesAddedModelChange("eq-unknown");
e.ShouldNotBeNull();
e.Changes.ShouldNotBeNull();
var changes = e.Changes.Value;
changes.Length.ShouldBe(1);
changes[0].Verb.ShouldBe((byte)ModelChangeStructureVerbMask.NodeAdded);
changes[0].AffectedType.ShouldBe(NodeId.Null);
await host.DisposeAsync();
}
/// <summary>Raising the announcement is tolerant: callable before any nodes exist (unknown affected id ⇒
/// AffectedType defaults to null, still a valid Part 3 change) AND after they are materialised, and never
/// throws even when the event path reaches no monitored items (same tolerance as the write-revert path).</summary>
[Trait("Category", "Unit")]
[Fact]
public async Task Raising_nodes_added_is_tolerant_before_and_after_nodes_exist()
{
var (host, server) = await BootAsync();
var nm = server.NodeManager!;
// Before any nodes exist under the parent — must not throw.
Should.NotThrow(() => nm.RaiseNodesAddedModelChange("eq-9"));
nm.EnsureFolder("eq-9", parentNodeId: null, displayName: "Equipment 9");
nm.EnsureVariable("eq-9/temp", parentFolderNodeId: "eq-9", displayName: "Temp", dataType: "Float", writable: false);
// After the nodes are materialised — still must not throw.
Should.NotThrow(() => nm.RaiseNodesAddedModelChange("eq-9"));
await host.DisposeAsync();
}
private async Task<(OpcUaApplicationHost Host, OtOpcUaSdkServer Server)> BootAsync()
{
var host = new OpcUaApplicationHost(
new OpcUaApplicationHostOptions
{
ApplicationName = "OtOpcUa.ModelChangeOnAddTest",
ApplicationUri = $"urn:OtOpcUa.ModelChangeOnAddTest:{Guid.NewGuid():N}",
OpcUaPort = AllocateFreePort(),
PublicHostname = "localhost",
PkiStoreRoot = _pkiRoot,
},
Microsoft.Extensions.Logging.Abstractions.NullLogger<OpcUaApplicationHost>.Instance);
var server = new OtOpcUaSdkServer();
await host.StartAsync(server, Ct);
return (host, server);
}
private static int AllocateFreePort()
{
using var listener = new System.Net.Sockets.TcpListener(System.Net.IPAddress.Loopback, 0);
listener.Start();
var port = ((System.Net.IPEndPoint)listener.LocalEndpoint).Port;
listener.Stop();
return port;
}
/// <summary>Cleans up the PKI root directory.</summary>
public void Dispose()
{
if (Directory.Exists(_pkiRoot))
{
try { Directory.Delete(_pkiRoot, recursive: true); }
catch { /* best-effort cleanup */ }
}
}
}
@@ -0,0 +1,44 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using Shouldly;
using Xunit;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
[Trait("Category", "Unit")]
public sealed class CapturingAddressSpaceBuilderTests
{
[Fact]
public void Records_nested_path_segments_full_reference_and_metadata()
{
var b = new CapturingAddressSpaceBuilder();
var focas = b.Folder("FOCAS", "FOCAS");
var device = focas.Folder("10.0.0.5:8193", "cnc");
var identity = device.Folder("Identity", "Identity");
identity.Variable("SeriesNumber", "SeriesNumber", new DriverAttributeInfo(
FullName: "10.0.0.5:8193/Identity/SeriesNumber",
DriverDataType: DriverDataType.String, IsArray: false, ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly, IsHistorized: false));
b.Nodes.Count.ShouldBe(1);
var n = b.Nodes[0];
n.FolderPathSegments.ShouldBe(new[] { "FOCAS", "10.0.0.5:8193", "Identity" });
n.BrowseName.ShouldBe("SeriesNumber");
n.FullReference.ShouldBe("10.0.0.5:8193/Identity/SeriesNumber");
n.DataType.ShouldBe(DriverDataType.String);
n.Writable.ShouldBeFalse(); // ViewOnly -> read-only
}
[Fact]
public void AddProperty_is_ignored_and_alarm_marking_is_a_noop_sink()
{
var b = new CapturingAddressSpaceBuilder();
var f = b.Folder("FOCAS", "FOCAS");
f.AddProperty("Manufacturer", DriverDataType.String, "FANUC"); // ignored, no throw
var h = f.Variable("V", "V", new DriverAttributeInfo("ref", DriverDataType.Int32, false, null,
SecurityClassification.ViewOnly, false, IsAlarm: true));
var sink = h.MarkAsAlarmCondition(new AlarmConditionInfo("src", AlarmSeverity.Low, null));
sink.ShouldNotBeNull(); // no-op sink, alarms out of scope
b.Nodes.Count.ShouldBe(1);
}
}
@@ -0,0 +1,130 @@
using System.Linq;
using System.Text.Json;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
/// <summary>
/// Proves follow-up E: the equipment's <c>DriverInstanceId</c> / <c>DeviceId</c> bindings and the
/// resolved <c>DeviceHost</c> (parsed from the bound <see cref="Device"/>'s schemaless
/// <c>DeviceConfig</c> JSON) round-trip with byte-parity through both <see cref="EquipmentNode"/>
/// producers: the live-edit composer (<see cref="AddressSpaceComposer.Compose(System.Collections.Generic.IReadOnlyList{UnsArea},System.Collections.Generic.IReadOnlyList{UnsLine},System.Collections.Generic.IReadOnlyList{Equipment},System.Collections.Generic.IReadOnlyList{DriverInstance},System.Collections.Generic.IReadOnlyList{ScriptedAlarm},System.Collections.Generic.IReadOnlyList{Device})"/>)
/// and the artifact decoder (<see cref="DeploymentArtifact.ParseComposition(System.ReadOnlySpan{byte})"/>).
/// A secondary/follower node decoding a serialized artifact MUST see the same DeviceHost as the
/// primary so it grafts FixedTree / partitions multi-device drivers identically. Both sides resolve
/// the host through the shared <see cref="AddressSpaceComposer.TryExtractDeviceHost"/> (single source
/// of truth + identical trim + lower-case normalization).
/// </summary>
public sealed class DeploymentArtifactDeviceHostParityTests
{
/// <summary>
/// One draft exercising every branch: driver + device + host (with a mixed-case/whitespace host
/// that must normalize identically on both sides); a driver-bound equipment with NO device
/// (DeviceId null ⇒ DeviceHost null); a driver-less, device-less equipment (all three null); and a
/// device whose config carries no <c>HostAddress</c> (DeviceId carried, DeviceHost null). The decoded
/// <c>EquipmentNodes</c> must equal the composer's element-wise (positional-record value equality)
/// and in the same order.
/// </summary>
[Fact]
public void Composer_and_artifact_agree_on_equipment_node_device_host()
{
// eq-1: driver d1 + device dev1 (host needs trim + lower-case)
var eq1 = NewEquipment("eq-1", driver: "d1", device: "dev1");
// eq-2: driver d1, NO device → DeviceHost null
var eq2 = NewEquipment("eq-2", driver: "d1", device: null);
// eq-3: driver-less + device-less → all three null
var eq3 = NewEquipment("eq-3", driver: null, device: null);
// eq-4: device dev-nohost whose config has no HostAddress → DeviceHost null
var eq4 = NewEquipment("eq-4", driver: "d1", device: "dev-nohost");
var dev1 = NewDevice("dev1", "d1", "{\"HostAddress\":\" HOST-A:8193 \"}"); // → host-a:8193
var devNoHost = NewDevice("dev-nohost", "d1", "{\"Port\":502}"); // → null
var equipment = new[] { eq1, eq2, eq3, eq4 };
var devices = new[] { dev1, devNoHost };
// ---- Side 1: the live-edit composer ----
var composed = AddressSpaceComposer.Compose(
Array.Empty<UnsArea>(), Array.Empty<UnsLine>(), equipment,
Array.Empty<DriverInstance>(), Array.Empty<ScriptedAlarm>(), devices: devices);
// ---- Side 2: serialise the SAME draft to the artifact blob shape, then decode it ----
var blob = JsonSerializer.SerializeToUtf8Bytes(new
{
Equipment = equipment.Select(ToEquipmentSnapshot).ToArray(),
Devices = devices.Select(ToDeviceSnapshot).ToArray(),
});
var decoded = DeploymentArtifact.ParseComposition(blob);
// ---- Full byte-parity: every field, same order (positional-record value equality) ----
decoded.EquipmentNodes.Count.ShouldBe(4);
decoded.EquipmentNodes.SequenceEqual(composed.EquipmentNodes).ShouldBeTrue();
// Spell out per-equipment so a divergence names the offending node.
var d1Node = decoded.EquipmentNodes.Single(e => e.EquipmentId == "eq-1");
d1Node.DriverInstanceId.ShouldBe("d1");
d1Node.DeviceId.ShouldBe("dev1");
d1Node.DeviceHost.ShouldBe("host-a:8193"); // trimmed + lower-cased on both sides
var d2Node = decoded.EquipmentNodes.Single(e => e.EquipmentId == "eq-2");
d2Node.DriverInstanceId.ShouldBe("d1");
d2Node.DeviceId.ShouldBeNull();
d2Node.DeviceHost.ShouldBeNull();
var d3Node = decoded.EquipmentNodes.Single(e => e.EquipmentId == "eq-3");
d3Node.DriverInstanceId.ShouldBeNull();
d3Node.DeviceId.ShouldBeNull();
d3Node.DeviceHost.ShouldBeNull();
var d4Node = decoded.EquipmentNodes.Single(e => e.EquipmentId == "eq-4");
d4Node.DriverInstanceId.ShouldBe("d1");
d4Node.DeviceId.ShouldBe("dev-nohost");
d4Node.DeviceHost.ShouldBeNull();
}
private static Equipment NewEquipment(string id, string? driver, string? device) => new()
{
EquipmentId = id,
DriverInstanceId = driver,
DeviceId = device,
UnsLineId = "line-1",
Name = id,
MachineCode = id.ToUpperInvariant(),
};
private static Device NewDevice(string deviceId, string driverInstanceId, string deviceConfig) => new()
{
DeviceId = deviceId,
DriverInstanceId = driverInstanceId,
Name = deviceId,
DeviceConfig = deviceConfig,
};
/// <summary>The Pascal-case snapshot an <see cref="Equipment"/> EF entity serialises to in the
/// artifact (matches ConfigComposer) — including the nullable <c>DriverInstanceId</c> / <c>DeviceId</c>
/// the equipment-node decoder re-reads.</summary>
private static object ToEquipmentSnapshot(Equipment e) => new
{
e.EquipmentId,
e.Name,
e.MachineCode,
e.UnsLineId,
e.DriverInstanceId,
e.DeviceId,
};
/// <summary>The Pascal-case snapshot a <see cref="Device"/> EF entity serialises to in the artifact —
/// the decoder re-reads <c>DeviceId</c> + the raw <c>DeviceConfig</c> blob the host rides inside.</summary>
private static object ToDeviceSnapshot(Device d) => new
{
d.DeviceId,
d.DriverInstanceId,
d.Name,
d.DeviceConfig,
};
}
@@ -0,0 +1,119 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using Shouldly;
using Xunit;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
[Trait("Category", "Unit")]
public sealed class DiscoveredNodeMapperTests
{
private static DiscoveredNode Node(string[] path, string name, string fullRef,
DriverDataType dt = DriverDataType.Float64, bool writable = false)
=> new(path, name, name, fullRef, dt, false, null, writable, false);
[Fact]
public void Maps_under_equipment_collapsing_single_device_folder()
{
var nodes = new[]
{
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "10.0.0.5:8193/Identity/SeriesNumber", DriverDataType.String),
Node(["FOCAS", "10.0.0.5:8193", "Axes", "X"], "AbsolutePosition", "10.0.0.5:8193/Axes/X/AbsolutePosition"),
};
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: new HashSet<string>());
result.Variables.Select(v => v.NodeId).ShouldBe(new[]
{
"EQ-1/FOCAS/Identity/SeriesNumber",
"EQ-1/FOCAS/Axes/X/AbsolutePosition",
}, ignoreOrder: true);
result.Folders.Select(f => f.NodeId).ShouldContain("EQ-1/FOCAS/Axes/X");
result.Folders.First(f => f.NodeId == "EQ-1/FOCAS/Axes/X").ParentNodeId.ShouldBe("EQ-1/FOCAS/Axes");
result.RoutingByRef["10.0.0.5:8193/Identity/SeriesNumber"].ShouldBe("EQ-1/FOCAS/Identity/SeriesNumber");
result.Variables.First(v => v.NodeId.EndsWith("SeriesNumber")).Writable.ShouldBeFalse();
}
[Fact]
public void Dedups_authored_refs()
{
var nodes = new[]
{
Node(["FOCAS", "10.0.0.5:8193"], "parts-count", "parts-count"),
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "10.0.0.5:8193/Identity/SeriesNumber", DriverDataType.String),
};
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: new HashSet<string> { "parts-count" });
result.Variables.ShouldHaveSingleItem();
result.Variables[0].NodeId.ShouldBe("EQ-1/FOCAS/Identity/SeriesNumber");
}
[Fact]
public void Does_not_collapse_when_two_devices_present()
{
var nodes = new[]
{
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "a", DriverDataType.String),
Node(["FOCAS", "10.0.0.6:8193", "Identity"], "SeriesNumber", "b", DriverDataType.String),
};
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: new HashSet<string>());
result.Variables.Select(v => v.NodeId).ShouldBe(new[]
{
"EQ-1/FOCAS/10.0.0.5:8193/Identity/SeriesNumber",
"EQ-1/FOCAS/10.0.0.6:8193/Identity/SeriesNumber",
}, ignoreOrder: true);
}
[Fact]
public void Empty_input_yields_empty_plan()
{
var result = DiscoveredNodeMapper.Map("EQ-1", Array.Empty<DiscoveredNode>(), authoredRefs: new HashSet<string>());
result.Folders.ShouldBeEmpty();
result.Variables.ShouldBeEmpty();
result.RoutingByRef.ShouldBeEmpty();
}
[Fact]
public void Array_metadata_passes_through_unchanged()
{
var node = new DiscoveredNode(
FolderPathSegments: ["FOCAS", "10.0.0.5:8193", "Axes"],
BrowseName: "Positions",
DisplayName: "Positions",
FullReference: "10.0.0.5:8193/Axes/Positions",
DataType: DriverDataType.Float64,
IsArray: true,
ArrayDim: 8u,
Writable: false,
IsHistorized: false);
var result = DiscoveredNodeMapper.Map("EQ-1", new[] { node }, authoredRefs: new HashSet<string>());
result.Variables.ShouldHaveSingleItem();
result.Variables[0].IsArray.ShouldBeTrue();
result.Variables[0].ArrayLength.ShouldBe(8u);
}
[Theory]
// Mirror OtOpcUaNodeManager.ResolveBuiltInDataType's accepted string set: Float32 -> "Float",
// Float64 -> "Double", Reference (Galaxy attr ref encoded as a string) -> "String". The pass-through
// members must keep their enum name so the node manager resolves them to the matching built-in type.
[InlineData(DriverDataType.Float64, "Double")]
[InlineData(DriverDataType.Float32, "Float")]
[InlineData(DriverDataType.Reference, "String")]
[InlineData(DriverDataType.Boolean, "Boolean")]
[InlineData(DriverDataType.Int16, "Int16")]
[InlineData(DriverDataType.Int32, "Int32")]
[InlineData(DriverDataType.Int64, "Int64")]
[InlineData(DriverDataType.UInt16, "UInt16")]
[InlineData(DriverDataType.UInt32, "UInt32")]
[InlineData(DriverDataType.UInt64, "UInt64")]
[InlineData(DriverDataType.String, "String")]
[InlineData(DriverDataType.DateTime, "DateTime")]
public void DataType_maps_to_node_manager_builtin_string(DriverDataType dt, string expected)
{
var nodes = new[] { Node(["FOCAS", "10.0.0.5:8193", "Identity"], "Value", "10.0.0.5:8193/Identity/Value", dt) };
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: new HashSet<string>());
result.Variables.ShouldHaveSingleItem();
result.Variables[0].DataType.ShouldBe(expected);
}
}
@@ -0,0 +1,350 @@
using System.Collections.Concurrent;
using System.Text.Json;
using Akka.Actor;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging.Abstractions;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.Commons.Types;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using ZB.MOM.WW.OtOpcUa.Runtime.OpcUa;
using ZB.MOM.WW.OtOpcUa.Runtime.Tests.Harness;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
/// <summary>
/// Task 9 — the focused END-TO-END proof that a driver-discovered FixedTree node is grafted into the
/// served Equipment OPC UA address space and a polled value reaches it. Unlike the Task-7/8 suites
/// (which wire the OPC UA publish side as a <see cref="Akka.TestKit.TestProbe"/> and assert on the
/// intercepted <see cref="OpcUaPublishActor.MaterialiseDiscoveredNodes"/> /
/// <see cref="OpcUaPublishActor.AttributeValueUpdate"/> messages), this suite wires the FULL real chain:
///
/// <list type="bullet">
/// <item>a real <see cref="DriverHostActor"/> (resolves equipment, maps via
/// <see cref="DiscoveredNodeMapper"/>, extends the live-value routing map, caches the plan);</item>
/// <item>a real <see cref="OpcUaPublishActor"/> as its <c>opcUaPublishActor</c> seam (so
/// <c>MaterialiseDiscoveredNodes</c> + <c>AttributeValueUpdate</c> are actually handled, not
/// intercepted);</item>
/// <item>a real <see cref="AddressSpaceApplier"/> over a recording
/// <see cref="IOpcUaAddressSpaceSink"/> (so the materialise + value-write reach the sink).</item>
/// </list>
///
/// <para>
/// The assertions are therefore made on the SINK's recorded <c>EnsureVariable</c> /
/// <c>RaiseNodesAddedModelChange</c> / <c>WriteValue</c> calls — i.e. the discovered node was
/// materialised through the real applier AND a published value surfaces <see cref="OpcUaQuality.Good"/>
/// at the mapped NodeId (in production this overwrites the <c>BadWaitingForInitialData</c> seed that
/// <c>OtOpcUaNodeManager.EnsureVariable</c> stamps on a freshly-materialised variable; the recording
/// sink does not model that seed, so the faithful assertion available here is that the live value
/// lands Good at the same NodeId the materialise created).
/// </para>
///
/// <para>
/// <b>Seam choices (faithful to the sibling suites).</b> Discovery is driven by Telling the host
/// <see cref="DriverInstanceActor.DiscoveredNodesReady"/> directly, and the polled value by Telling
/// <see cref="DriverInstanceActor.AttributeValuePublished"/> directly — exactly the seams the Task-7/8
/// tests use (there is no test seam to drive a real <see cref="ITagDiscovery"/> poll loop through a
/// child to Connected, and the spawned child is a <see cref="SubscribableStubDriver"/>). The publish
/// actor is wired WITHOUT a dbFactory, so the host's apply-time <see cref="OpcUaPublishActor.RebuildAddressSpace"/>
/// falls back to a raw <c>sink.RebuildAddressSpace()</c> (no <c>EnsureVariable</c>); this keeps the
/// ONLY <c>EnsureVariable</c> traffic on the sink the discovered-node materialise itself, so the
/// mapped NodeId is unambiguous. The discovery-injection chain (mapper → applier → sink + routing
/// map) is fully real.
/// </para>
/// </summary>
[Trait("Category", "Unit")]
public sealed class DiscoveryInjectionEndToEndTests : RuntimeActorTestBase
{
private static readonly NodeId TestNode = NodeId.Parse("disc-e2e-node");
private static readonly RevisionHash RevA = RevisionHash.Parse(new string('a', 64));
private static readonly RevisionHash RevB = RevisionHash.Parse(new string('b', 64));
private static readonly TimeSpan Timeout = TimeSpan.FromSeconds(5);
private static readonly DateTime Ts = new(2026, 6, 26, 10, 0, 0, DateTimeKind.Utc);
// The FixedTree node the driver "discovers": FOCAS/<deviceHost>/Identity/SeriesNumber, a String value,
// whose FullReference differs from any authored tag so the mapper keeps it (does not shadow an authored
// node). The single device-host folder collapses, so it materialises at EQ-1/FOCAS/Identity/SeriesNumber.
private const string FixedTreeRef = "10.0.0.5:8193/Identity/SeriesNumber";
private const string FixedTreeDisplayName = "SeriesNumber";
// The DETERMINISTIC NodeId the chain must place the FixedTree node at: EQ-1 (the bound equipment root) +
// the COLLAPSED folder path. The mapper's device-folder collapse drops the single shared device-host
// segment ("10.0.0.5:8193"), so FolderPathSegments ["FOCAS","10.0.0.5:8193","Identity"] + browse
// "SeriesNumber" → "EQ-1/FOCAS/Identity/SeriesNumber" (per EquipmentNodeIds.Variable). Asserting this
// EXACT NodeId closes the loop on the collapse rule — a prefix/StartsWith check would still pass if the
// collapse broke (e.g. "EQ-1/FOCAS/10.0.0.5:8193/Identity/SeriesNumber").
private const string ExpectedFixedTreeNodeId = "EQ-1/FOCAS/Identity/SeriesNumber";
private static DiscoveredNode[] FixedTreeNodes() => new[]
{
new DiscoveredNode(
FolderPathSegments: new[] { "FOCAS", "10.0.0.5:8193", "Identity" },
BrowseName: "SeriesNumber",
DisplayName: FixedTreeDisplayName,
FullReference: FixedTreeRef,
DataType: DriverDataType.String,
IsArray: false,
ArrayDim: null,
Writable: false,
IsHistorized: false),
};
/// <summary>
/// End-to-end #1: the discovered FixedTree node appears at the equipment AND a polled value flows
/// Good. Drives the real host (deployment applied, real child spawned, discovery reported) wired to a
/// real publish actor + real applier + recording sink, then asserts:
/// (a) the sink recorded an <c>EnsureVariable</c> for the FixedTree node under EQ-1 (materialised
/// through the REAL applier — the node now exists in the served address space), with a
/// <c>RaiseNodesAddedModelChange</c> under EQ-1 so connected clients refresh;
/// (b) after an <see cref="DriverInstanceActor.AttributeValuePublished"/> for the FixedTree ref, the
/// sink recorded a <c>WriteValue</c> at THAT SAME NodeId carrying the value with
/// <see cref="OpcUaQuality.Good"/> — proving the live value routed end-to-end and (in production)
/// overwrote the BadWaitingForInitialData seed.
/// </summary>
[Fact]
public void Discovered_node_materialises_at_equipment_and_polled_value_flows_Good()
{
var db = NewInMemoryDbFactory();
var deploymentId = SeedDeploymentWithEquipmentTags(db, RevA,
(Equip: "EQ-1", Driver: "d1", FullName: "40001", Folder: (string?)null, Name: "speed"));
var (host, sink, _) = SpawnHostWithRealPublishActor(db, deploymentId);
// Driver reports its captured FixedTree (the faithful Task-7/8 seam).
host.Tell(new DriverInstanceActor.DiscoveredNodesReady("d1", FixedTreeNodes()));
// (a) The discovered variable was materialised through the REAL applier onto the sink, at the EXACT
// collapsed NodeId under the bound equipment root (proves the mapper's device-folder collapse).
AwaitAssert(() =>
{
var v = sink.Variables.SingleOrDefault(x => x.DisplayName == FixedTreeDisplayName);
v.NodeId.ShouldBe(ExpectedFixedTreeNodeId); // EnsureVariable at the exact collapsed NodeId under EQ-1
v.DataType.ShouldBe("String"); // mapper carried the driver type through to the sink
v.Writable.ShouldBeFalse(); // discovered nodes are read-only
sink.ModelChanges.ShouldContain("EQ-1"); // NodeAdded announced under the equipment
}, duration: Timeout);
// (b) A value published for the FixedTree ref routes to THAT exact NodeId and lands Good — the live
// value flowed end-to-end (host routing map → publish actor → applier-backing sink WriteValue).
host.Tell(new DriverInstanceActor.AttributeValuePublished("d1", FixedTreeRef, "SN-12345", OpcUaQuality.Good, Ts));
AwaitAssert(() =>
{
var write = sink.Values.SingleOrDefault(x => x.NodeId == ExpectedFixedTreeNodeId);
write.NodeId.ShouldBe(ExpectedFixedTreeNodeId);
write.Value.ShouldBe("SN-12345");
write.Quality.ShouldBe(OpcUaQuality.Good);
write.Ts.ShouldBe(Ts);
}, duration: Timeout);
}
/// <summary>
/// End-to-end #2 (Task 8 survival): the injected FixedTree node + its live-value route SURVIVE a
/// redeploy. After the first injection materialises + a value flows Good, a SECOND deployment (new
/// revision, same d1 → EQ-1 binding) re-runs <c>PushDesiredSubscriptions</c> — which clears the
/// routing maps and re-pushes an authored-only subscription set; the Task-8 tail re-apply re-grafts
/// the cached discovered plan. Asserts the sink records the FixedTree <c>EnsureVariable</c> AGAIN
/// (re-materialised after the rebuild) at the same NodeId, and a subsequent published value STILL
/// <c>WriteValue</c>s Good there (the routing map was rebuilt, not left empty by the Clear()).
/// </summary>
[Fact]
public void Discovered_node_and_value_survive_a_redeploy()
{
var db = NewInMemoryDbFactory();
var deploymentId = SeedDeploymentWithEquipmentTags(db, RevA,
(Equip: "EQ-1", Driver: "d1", FullName: "40001", Folder: (string?)null, Name: "speed"));
var (host, sink, coordinator) = SpawnHostWithRealPublishActor(db, deploymentId);
host.Tell(new DriverInstanceActor.DiscoveredNodesReady("d1", FixedTreeNodes()));
// First injection: the FixedTree node materialises at the EXACT collapsed NodeId under EQ-1.
AwaitAssert(
() => sink.Variables.ShouldContain(x => x.NodeId == ExpectedFixedTreeNodeId && x.DisplayName == FixedTreeDisplayName),
duration: Timeout);
// First value flows Good (pre-redeploy baseline).
host.Tell(new DriverInstanceActor.AttributeValuePublished("d1", FixedTreeRef, "SN-AAA", OpcUaQuality.Good, Ts));
AwaitAssert(
() => sink.Values.ShouldContain(x => x.NodeId == ExpectedFixedTreeNodeId && Equals(x.Value, "SN-AAA") && x.Quality == OpcUaQuality.Good),
duration: Timeout);
var ensureVarCountBefore = sink.Variables.Count(x => x.NodeId == ExpectedFixedTreeNodeId);
// Apply a SECOND deployment (new revision, SAME d1 → EQ-1 binding) — re-runs PushDesiredSubscriptions
// (clears + rebuilds the routing maps) then the Task-8 tail re-applies the cached discovered plan.
var deploymentId2 = SeedDeploymentWithEquipmentTags(db, RevB,
(Equip: "EQ-1", Driver: "d1", FullName: "40001", Folder: (string?)null, Name: "speed"));
host.Tell(new DispatchDeployment(deploymentId2, RevB, CorrelationId.NewId()));
coordinator.ExpectMsg<ApplyAck>(Timeout).Outcome.ShouldBe(ApplyAckOutcome.Applied);
// (a) The cached discovered plan was RE-MATERIALISED at the SAME exact NodeId after the redeploy rebuild.
AwaitAssert(
() => sink.Variables.Count(x => x.NodeId == ExpectedFixedTreeNodeId).ShouldBeGreaterThan(ensureVarCountBefore),
duration: Timeout);
// (b) A value published AFTER the redeploy STILL routes to the exact NodeId and lands Good — the
// live-value routing map was rebuilt by the re-apply (not lost when PushDesiredSubscriptions cleared it).
var tsAfter = Ts.AddSeconds(5);
host.Tell(new DriverInstanceActor.AttributeValuePublished("d1", FixedTreeRef, "SN-BBB", OpcUaQuality.Good, tsAfter));
AwaitAssert(
() => sink.Values.ShouldContain(x => x.NodeId == ExpectedFixedTreeNodeId && Equals(x.Value, "SN-BBB") && x.Quality == OpcUaQuality.Good),
duration: Timeout);
}
/// <summary>Spawns the real chain — recording sink → real <see cref="AddressSpaceApplier"/> → real
/// <see cref="OpcUaPublishActor"/> (the host's <c>opcUaPublishActor</c> seam) → real
/// <see cref="DriverHostActor"/> backed by a <see cref="SubscribableStubDriver"/> — dispatches the
/// deployment, and waits for the Applied ACK so <c>_lastComposition</c> + the live child + the initial
/// subscribe pass have completed before discovery is injected. The publish actor is wired with the
/// applier but NO dbFactory, so its apply-time RebuildAddressSpace is a raw sink rebuild (no EnsureVariable)
/// and the only EnsureVariable traffic is the discovered-node materialise itself.</summary>
private (IActorRef Host, RecordingSink Sink, Akka.TestKit.TestProbe Coordinator) SpawnHostWithRealPublishActor(
IDbContextFactory<OtOpcUaConfigDbContext> db, DeploymentId deploymentId)
{
var coordinator = CreateTestProbe();
var vtHost = CreateTestProbe();
var sink = new RecordingSink();
var applier = new AddressSpaceApplier(sink, NullLogger<AddressSpaceApplier>.Instance);
var publish = Sys.ActorOf(OpcUaPublishActor.PropsForTests(sink: sink, applier: applier));
var host = Sys.ActorOf(DriverHostActor.Props(
db, TestNode, coordinator.Ref,
driverFactory: new SubscribingDriverFactory("Modbus"),
localRoles: new HashSet<string> { "driver" },
opcUaPublishActor: publish,
virtualTagHostOverride: vtHost.Ref));
host.Tell(new DispatchDeployment(deploymentId, RevA, CorrelationId.NewId()));
coordinator.ExpectMsg<ApplyAck>(Timeout).Outcome.ShouldBe(ApplyAckOutcome.Applied);
return (host, sink, coordinator);
}
/// <summary>Seeds a Sealed deployment whose artifact carries the minimal arrays needed to project
/// equipment tags + a real (non-stubbed) <see cref="DriverInstanceActor"/> child for each driver
/// (mirrors <c>DriverHostActorDiscoveryTests.SeedDeploymentWithEquipmentTags</c>). An authored value tag
/// both sets <c>_lastComposition</c> and binds the driver → equipment (the only way the host resolves the
/// equipment a discovered node grafts under).</summary>
private static DeploymentId SeedDeploymentWithEquipmentTags(
IDbContextFactory<OtOpcUaConfigDbContext> db, RevisionHash rev,
params (string Equip, string Driver, string FullName, string? Folder, string Name)[] tags)
{
var driverIds = tags.Select(t => t.Driver).Distinct(StringComparer.Ordinal).ToArray();
var artifact = JsonSerializer.SerializeToUtf8Bytes(new
{
Namespaces = new[]
{
new { NamespaceId = "ns-eq", Kind = 0 }, // NamespaceKind.Equipment = 0
},
DriverInstances = driverIds.Select(d => new
{
DriverInstanceRowId = Guid.NewGuid(),
DriverInstanceId = d,
Name = d,
DriverType = "Modbus", // not Windows-only ⇒ a real child is spawned (not stubbed)
Enabled = true,
DriverConfig = "{}",
NamespaceId = "ns-eq",
}).ToArray(),
Tags = tags.Select((t, i) => new
{
TagId = $"tag-{i}",
EquipmentId = t.Equip,
DriverInstanceId = t.Driver,
Name = t.Name,
FolderPath = t.Folder,
DataType = "Double",
TagConfig = JsonSerializer.Serialize(new { FullName = t.FullName }),
}).ToArray(),
});
var id = DeploymentId.NewId();
using var ctx = db.CreateDbContext();
ctx.Deployments.Add(new Deployment
{
DeploymentId = id.Value,
RevisionHash = rev.Value,
Status = DeploymentStatus.Sealed,
CreatedBy = "test",
SealedAtUtc = DateTime.UtcNow,
ArtifactBlob = artifact,
});
ctx.SaveChanges();
return id;
}
/// <summary>Factory producing a single shared <see cref="SubscribableStubDriver"/> for the supported
/// type, so a real (non-stubbed) <see cref="DriverInstanceActor"/> child is spawned for the driver and
/// the host's subscribe path is exercised (mirrors
/// <c>DriverHostActorDiscoveryTests.SubscribingDriverFactory</c>).</summary>
private sealed class SubscribingDriverFactory : IDriverFactory
{
private readonly string _supportedType;
private readonly SubscribableStubDriver _driver = new();
public SubscribingDriverFactory(string supportedType) { _supportedType = supportedType; }
/// <inheritdoc />
public IDriver? TryCreate(string driverType, string driverInstanceId, string driverConfigJson) =>
string.Equals(driverType, _supportedType, StringComparison.Ordinal) ? _driver : null;
/// <inheritdoc />
public IReadOnlyCollection<string> SupportedTypes => new[] { _supportedType };
}
/// <summary>Recording <see cref="IOpcUaAddressSpaceSink"/> — captures the EnsureFolder / EnsureVariable /
/// WriteValue / RaiseNodesAddedModelChange calls the real applier + publish actor drive, so the test can
/// assert the discovered node was materialised and a value landed Good at its NodeId. Thread-safe (the
/// publish actor runs on an Akka dispatcher thread, the test asserts from the test thread).</summary>
private sealed class RecordingSink : IOpcUaAddressSpaceSink
{
private readonly ConcurrentQueue<(string NodeId, string? ParentNodeId, string DisplayName)> _folders = new();
private readonly ConcurrentQueue<(string NodeId, string? ParentNodeId, string DisplayName, string DataType, bool Writable)> _variables = new();
private readonly ConcurrentQueue<(string NodeId, object? Value, OpcUaQuality Quality, DateTime Ts)> _values = new();
private readonly ConcurrentQueue<string> _modelChanges = new();
/// <summary>Gets a snapshot of the recorded EnsureFolder calls.</summary>
public List<(string NodeId, string? ParentNodeId, string DisplayName)> Folders => _folders.ToList();
/// <summary>Gets a snapshot of the recorded EnsureVariable calls.</summary>
public List<(string NodeId, string? ParentNodeId, string DisplayName, string DataType, bool Writable)> Variables => _variables.ToList();
/// <summary>Gets a snapshot of the recorded WriteValue calls.</summary>
public List<(string NodeId, object? Value, OpcUaQuality Quality, DateTime Ts)> Values => _values.ToList();
/// <summary>Gets a snapshot of the recorded RaiseNodesAddedModelChange announcements.</summary>
public List<string> ModelChanges => _modelChanges.ToList();
/// <summary>Gets the count of raw RebuildAddressSpace calls (apply-time rebuild fallback).</summary>
public int RebuildCalls;
/// <summary>Records a live-value write.</summary>
public void WriteValue(string nodeId, object? value, OpcUaQuality quality, DateTime sourceTimestampUtc)
=> _values.Enqueue((nodeId, value, quality, sourceTimestampUtc));
/// <summary>No-op: alarm writes are not exercised by this suite.</summary>
public void WriteAlarmCondition(string alarmNodeId, AlarmConditionSnapshot state, DateTime sourceTimestampUtc) { }
/// <summary>No-op: alarm materialise is not exercised by this suite.</summary>
public void MaterialiseAlarmCondition(string alarmNodeId, string equipmentNodeId, string displayName, string alarmType, int severity, bool isNative = false) { }
/// <summary>Records an EnsureFolder call.</summary>
public void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName)
=> _folders.Enqueue((folderNodeId, parentNodeId, displayName));
/// <summary>Records an EnsureVariable call.</summary>
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null)
=> _variables.Enqueue((variableNodeId, parentFolderNodeId, displayName, dataType, writable));
/// <summary>Records a raw rebuild (the apply-time fallback when no dbFactory is wired).</summary>
public void RebuildAddressSpace() => Interlocked.Increment(ref RebuildCalls);
/// <summary>Records a NodeAdded model-change announcement.</summary>
public void RaiseNodesAddedModelChange(string affectedNodeId) => _modelChanges.Enqueue(affectedNodeId);
}
}
@@ -0,0 +1,511 @@
using Akka.Actor;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using ZB.MOM.WW.OtOpcUa.Runtime.Tests.Harness;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
/// <summary>
/// Covers the bounded post-connect re-discovery loop: when an <see cref="ITagDiscovery"/> driver
/// reaches Connected, <see cref="DriverInstanceActor"/> runs repeated discovery passes (FOCAS-style:
/// the FixedTree is suppressed until the driver's cache populates ~02s after connect) and ships each
/// pass's captured nodes to its parent as <see cref="DriverInstanceActor.DiscoveredNodesReady"/>. The
/// loop STOPS once the non-empty discovered set stabilises (or the attempt cap is hit) — it must not
/// spin forever. A driver that does not implement <see cref="ITagDiscovery"/> produces no passes at all.
/// </summary>
[Trait("Category", "Unit")]
public sealed class DriverInstanceActorDiscoveryTests : RuntimeActorTestBase
{
/// <summary>
/// A discoverable driver whose first two passes yield nothing (cache still warming) and whose third
/// pass onward yields a stable 3-node set: the actor ships every pass, then STOPS once the non-empty
/// set repeats. The final <see cref="DriverInstanceActor.DiscoveredNodesReady"/> carries the 3 nodes
/// and no further passes arrive — proving the loop is bounded.
/// </summary>
[Fact]
public void Discovery_retries_until_set_stabilises_then_stops()
{
var driver = new DiscoverableStubDriver();
var parent = CreateTestProbe();
// Tiny interval so the bounded retry runs in well under a second (no real-time waits).
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
// Drive Connecting → Connected; the Connected entry kicks discovery.
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// Each discovery pass publishes one DiscoveredNodesReady. The fake stabilises after pass 4
// (passes: 0,0,3,3), so exactly 4 messages arrive, then the stream stops.
var msgs = new List<DriverInstanceActor.DiscoveredNodesReady>();
for (var i = 0; i < 4; i++)
msgs.Add(parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2)));
// The loop must STOP once the non-empty set has stabilised — no fifth pass.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
// Early passes were empty (FixedTree cache still populating).
msgs[0].Nodes.Count.ShouldBe(0);
msgs[1].Nodes.Count.ShouldBe(0);
// The set then appears and stabilises at 3 nodes.
msgs[2].Nodes.Count.ShouldBe(3);
var final = msgs[^1];
final.Nodes.Count.ShouldBe(3);
final.DriverInstanceId.ShouldBe(driver.DriverInstanceId);
final.Nodes.Select(n => n.FullReference).ShouldBe(new[] { "m.fixed.v0", "m.fixed.v1", "m.fixed.v2" });
// The driver was asked exactly as many times as messages published — no extra zombie pass.
driver.DiscoverCount.ShouldBe(4);
}
/// <summary>A driver that does not implement <see cref="ITagDiscovery"/> produces no discovery passes —
/// the Connected entry's discovery kick is a no-op, so the parent receives no
/// <see cref="DriverInstanceActor.DiscoveredNodesReady"/>.</summary>
[Fact]
public void Driver_without_ITagDiscovery_produces_no_discovery()
{
var driver = new SubscribableStubDriver(); // IDriver + ISubscribable, NOT ITagDiscovery
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
AwaitCondition(() => driver.InitializeCount > 0, TimeSpan.FromSeconds(2));
// No discovery capability ⇒ never any DiscoveredNodesReady to the parent.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
}
/// <summary>
/// Discovery RE-RUNS on every return to Connected: after the initial discovery settles, a
/// <see cref="DriverInstanceActor.ForceReconnect"/> drives the actor through Reconnecting and
/// back to Connected (via the auto-retry timer, the same path the existing reconnect tests use),
/// and a fresh bounded discovery loop fires — keeping the injected tree current if the backend's
/// capabilities changed across the reconnect. The new init bumps the generation, so any
/// pre-reconnect tick is discarded by the generation guard (the initial loop has already settled
/// here, so none are in flight).
/// </summary>
[Fact]
public void Discovery_reruns_after_reconnect()
{
var driver = new DiscoverableStubDriver();
var parent = CreateTestProbe();
// Tiny reconnect + rediscover intervals so the whole reconnect-then-rediscover cycle runs fast.
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver,
reconnectInterval: TimeSpan.FromMilliseconds(50),
rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// Drain the initial settling passes (0,0,3,3) and confirm the first loop stopped.
for (var i = 0; i < 4; i++)
parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2));
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(200));
var passesBeforeReconnect = driver.DiscoverCount; // 4
// Force a reconnect: Connected → Reconnecting → (auto retry-connect) → Connected again.
actor.Tell(new DriverInstanceActor.ForceReconnect());
// A fresh discovery pass must arrive after the reconnect — the cache is warm now, so it sees
// the stable 3-node set immediately.
var afterReconnect = parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(3));
afterReconnect.Nodes.Count.ShouldBe(3);
afterReconnect.DriverInstanceId.ShouldBe(driver.DriverInstanceId);
// The driver was discovered again — proves a fresh loop ran, not a replay of the old one.
driver.DiscoverCount.ShouldBeGreaterThan(passesBeforeReconnect);
}
/// <summary>
/// Regression for the Critical: a driver whose <c>DiscoverAsync</c> completes ASYNCHRONOUSLY (off the
/// actor thread) must still ship <see cref="DriverInstanceActor.DiscoveredNodesReady"/>. The handler
/// touches <c>Context.Parent</c> + <c>Timers</c> AFTER awaiting discovery; if it awaited with
/// <c>ConfigureAwait(false)</c> the continuation would resume off the actor context and those calls
/// would throw <c>NotSupportedException("no active ActorContext")</c> — the handler would fault and no
/// message would arrive. Synchronous (<c>Task.CompletedTask</c>) stubs mask the bug; this one forces a
/// genuine off-context resume (modelled on <c>SubscribableStubDriver.UnsubscribeYields</c>).
/// </summary>
[Fact]
public void Async_completing_discovery_resumes_on_actor_context_and_publishes()
{
var driver = new YieldingDiscoverableStubDriver();
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// With the fix the handler resumes on the actor context, so the publish succeeds and the parent gets
// a non-empty set. Without it the handler faults at Context.Parent.Tell and this times out.
var published = parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2));
published.Nodes.Count.ShouldBe(3);
published.DriverInstanceId.ShouldBe(driver.DriverInstanceId);
}
/// <summary>
/// The attempt cap bounds a discovered set that never stabilises: a driver whose set keeps GROWING
/// (1,2,3,…) never repeats its signature, so the loop is stopped only by
/// <c>rediscoverMaxAttempts</c>. With a cap of 3, exactly 3 passes are published, then the stream stops.
/// </summary>
[Fact]
public void Never_stabilising_discovery_is_bounded_by_the_attempt_cap()
{
var driver = new GrowingDiscoverableStubDriver();
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20), rediscoverMaxAttempts: 3));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
var msgs = new List<DriverInstanceActor.DiscoveredNodesReady>();
for (var i = 0; i < 3; i++)
msgs.Add(parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2)));
// Cap reached — no fourth pass even though the set never stabilised.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
// The set genuinely kept growing across the capped passes (1,2,3 nodes).
msgs.Select(m => m.Nodes.Count).ShouldBe(new[] { 1, 2, 3 });
driver.DiscoverCount.ShouldBe(3);
}
/// <summary>
/// A driver whose <see cref="ITagDiscovery.RediscoverPolicy"/> is
/// <see cref="DiscoveryRediscoverPolicy.Never"/> opts out of post-connect discovery entirely: the
/// Connected entry's discovery kick returns before scheduling the first tick, so the driver is never
/// asked to discover and the parent receives no <see cref="DriverInstanceActor.DiscoveredNodesReady"/>.
/// </summary>
[Fact]
public void Discovery_policy_Never_runs_no_passes_and_publishes_nothing()
{
var driver = new DiscoverableStubDriver(DiscoveryRediscoverPolicy.Never);
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// Connect happened (the discovery decision is made on the Connected entry)...
AwaitCondition(() => driver.InitializeCount > 0, TimeSpan.FromSeconds(2));
// ...but policy=Never ⇒ no discovery pass is ever run and nothing is published.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
driver.DiscoverCount.ShouldBe(0);
}
/// <summary>
/// A driver whose <see cref="ITagDiscovery.RediscoverPolicy"/> is
/// <see cref="DiscoveryRediscoverPolicy.Once"/> runs EXACTLY one post-connect pass even when its
/// discovered set would keep growing forever — under <c>UntilStable</c> the never-repeating signature
/// would retry to the attempt cap. Exactly one <see cref="DriverInstanceActor.DiscoveredNodesReady"/>
/// is published and no further <c>RediscoverTick</c> is scheduled.
/// </summary>
[Fact]
public void Discovery_policy_Once_publishes_exactly_one_pass_even_when_set_keeps_growing()
{
var driver = new GrowingDiscoverableStubDriver(DiscoveryRediscoverPolicy.Once);
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// Exactly one pass is published (the first, growing set → 1 node)...
var only = parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2));
only.Nodes.Count.ShouldBe(1);
only.DriverInstanceId.ShouldBe(driver.DriverInstanceId);
// ...and NO second tick is scheduled, even though the set would keep growing under UntilStable.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
driver.DiscoverCount.ShouldBe(1);
}
/// <summary>
/// <see cref="DiscoveryRediscoverPolicy.Once"/> means one pass PER (re)connect cycle — not one pass
/// ever. After the initial single pass settles, a <see cref="DriverInstanceActor.ForceReconnect"/>
/// drives the actor through Reconnecting and back to Connected (via the auto retry-connect timer), and
/// <c>StartDiscovery</c> re-kicks discovery — which must run EXACTLY ONE more pass, not the full attempt
/// cap. Uses the ever-growing fake with a small cap (3): under a (wrong) policy-ignoring loop the
/// never-stabilising set would publish 3 passes per connect, so a single post-reconnect pass proves
/// <c>Once</c> is honoured on the reconnect path too. Guards the exact StartDiscovery-on-reconnect path
/// the follow-on TriggerRediscovery task touches.
/// </summary>
[Fact]
public void Discovery_policy_Once_reruns_one_pass_on_reconnect()
{
var driver = new GrowingDiscoverableStubDriver(DiscoveryRediscoverPolicy.Once);
var parent = CreateTestProbe();
// Small reconnect + rediscover intervals so the cycle runs fast; cap 3 so a (wrong) full loop is
// visibly more than the one pass Once must run per (re)connect.
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver,
reconnectInterval: TimeSpan.FromMilliseconds(50),
rediscoverInterval: TimeSpan.FromMilliseconds(20),
rediscoverMaxAttempts: 3));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// Initial connect: Once ⇒ exactly one pass (growing set → 1 node), then no more.
var first = parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2));
first.Nodes.Count.ShouldBe(1);
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(200));
driver.DiscoverCount.ShouldBe(1);
// Force a reconnect: Connected → Reconnecting → (auto retry-connect) → Connected again.
actor.Tell(new DriverInstanceActor.ForceReconnect());
// Once = one pass PER (re)connect: exactly ONE additional pass after the reconnect, NOT the full cap.
// The set keeps growing across the reconnect (same driver instance), so this pass yields 2 nodes.
var afterReconnect = parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(3));
afterReconnect.Nodes.Count.ShouldBe(2);
afterReconnect.DriverInstanceId.ShouldBe(driver.DriverInstanceId);
// No further passes — Once did NOT run the attempt cap on reconnect; one pass per connect cycle.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
driver.DiscoverCount.ShouldBe(2);
}
/// <summary>
/// The per-pass discovery timeout is injectable via <see cref="DriverInstanceActor.Props"/> so tests
/// can control it without real-time delays. The default constant must be 30 seconds (behaviour-preserving).
/// Wiring is verified by constructing via <c>Props</c> with a custom value and confirming the actor starts
/// and begins discovery normally.
/// </summary>
[Fact]
public void Discovery_timeout_default_constant_is_30s_and_Props_accepts_custom_value()
{
// The constant must exist and preserve the pre-refactor 30 s literal.
DriverInstanceActor.DefaultRediscoverDiscoverTimeout.ShouldBe(TimeSpan.FromSeconds(30));
// Props must accept the new optional parameter — no throw and actor starts normally.
var driver = new DiscoverableStubDriver();
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver,
rediscoverInterval: TimeSpan.FromMilliseconds(20),
rediscoverDiscoverTimeout: TimeSpan.FromSeconds(5)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// Actor starts and discovery publishes — confirms the custom timeout was wired without error.
parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2));
}
/// <summary>
/// <see cref="DriverInstanceActor.TriggerRediscovery"/> received while Connected re-kicks the
/// post-connect discovery loop: after the initial discovery has settled, sending the message drives a
/// FRESH discovery pass — the driver's <c>DiscoverCount</c> advances and a new
/// <see cref="DriverInstanceActor.DiscoveredNodesReady"/> is published. This is the message
/// <see cref="DriverHostActor"/> uses to re-run discovery after rebinding the driver to a new equipment.
/// </summary>
[Fact]
public void TriggerRediscovery_when_Connected_reruns_discovery()
{
// Once-policy growing stub: exactly ONE pass per (re)kick, so each StartDiscovery publishes precisely
// one DiscoveredNodesReady — the trigger's effect is asserted with a single ExpectMsg + ExpectNoMsg
// (no second settling pass to drain, and no stale-tick double pass alongside the fresh one).
var driver = new GrowingDiscoverableStubDriver(DiscoveryRediscoverPolicy.Once);
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// Initial connect: Once ⇒ exactly one pass (growing set → 1 node), then it settles.
parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2)).Nodes.Count.ShouldBe(1);
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(200));
var passesBeforeTrigger = driver.DiscoverCount; // 1
// Re-kick discovery via the new message — Once ⇒ exactly one fresh pass (growing set → 2 nodes).
actor.Tell(new DriverInstanceActor.TriggerRediscovery());
var afterTrigger = parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2));
afterTrigger.Nodes.Count.ShouldBe(2);
afterTrigger.DriverInstanceId.ShouldBe(driver.DriverInstanceId);
// Exactly one fresh pass ran — DiscoverCount advanced by one and no extra pass arrived.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
driver.DiscoverCount.ShouldBe(passesBeforeTrigger + 1);
}
/// <summary>
/// <see cref="DriverInstanceActor.TriggerRediscovery"/> on a driver whose
/// <see cref="ITagDiscovery.RediscoverPolicy"/> is <see cref="DiscoveryRediscoverPolicy.Never"/> does
/// NOT re-discover: the handler calls <c>StartDiscovery</c>, which returns early for <c>Never</c>, so
/// no pass runs and nothing is published — mirroring the Connected-entry Never opt-out.
/// </summary>
[Fact]
public void TriggerRediscovery_with_policy_Never_does_not_rediscover()
{
var driver = new DiscoverableStubDriver(DiscoveryRediscoverPolicy.Never);
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
AwaitCondition(() => driver.InitializeCount > 0, TimeSpan.FromSeconds(2));
// Connected, but policy=Never — the trigger is honoured by StartDiscovery's early return.
actor.Tell(new DriverInstanceActor.TriggerRediscovery());
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
driver.DiscoverCount.ShouldBe(0);
}
/// <summary>
/// <see cref="DriverInstanceActor.TriggerRediscovery"/> received while NOT Connected is a clean silent
/// no-op in EVERY non-Connected state: no discovery pass runs, nothing is published, and the actor
/// neither crashes nor dies (its eventual (re)connect re-discovers anyway). Covers both <c>Connecting</c>
/// (before init completes) and <c>Reconnecting</c> (after a <see cref="DriverInstanceActor.ForceReconnect"/>,
/// parked there by a long reconnect interval), with an intervening connect proving the actor is unharmed.
/// </summary>
[Fact]
public void TriggerRediscovery_when_not_Connected_is_a_silent_noop()
{
// Once-growing stub so a successful connect publishes exactly one pass (clean confirmation of state);
// a long reconnect interval so the actor parks in Reconnecting deterministically within the test window.
var driver = new GrowingDiscoverableStubDriver(DiscoveryRediscoverPolicy.Once);
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver,
reconnectInterval: TimeSpan.FromSeconds(30),
rediscoverInterval: TimeSpan.FromMilliseconds(20)));
Watch(actor);
// (1) Connecting: the actor boots into Connecting; send the trigger BEFORE InitializeRequested so it
// is handled in that non-Connected state.
actor.Tell(new DriverInstanceActor.TriggerRediscovery());
// No discovery resulted, and the actor is unharmed (no Terminated arrives at the watching test actor).
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(200));
ExpectNoMsg(TimeSpan.FromMilliseconds(100));
driver.DiscoverCount.ShouldBe(0);
// Drive to Connected (proves the Connecting-state trigger left the actor working); Once ⇒ one pass.
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2)).Nodes.Count.ShouldBe(1);
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(200));
var passesAfterConnect = driver.DiscoverCount; // 1
// (2) Reconnecting: ForceReconnect parks the actor in Reconnecting (30s retry interval ⇒ no auto
// reconnect within the window). A TriggerRediscovery here must ALSO be a clean silent no-op. Both
// messages are processed in order, so the trigger is handled while Reconnecting.
actor.Tell(new DriverInstanceActor.ForceReconnect());
actor.Tell(new DriverInstanceActor.TriggerRediscovery());
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
ExpectNoMsg(TimeSpan.FromMilliseconds(100)); // still alive — no Terminated
driver.DiscoverCount.ShouldBe(passesAfterConnect); // no fresh pass while Reconnecting
}
/// <summary>
/// A <see cref="StubDriver"/> that also exposes <see cref="ITagDiscovery"/>. Each <c>DiscoverAsync</c>
/// pass is counted; passes 12 yield nothing (cache warming), passes 3+ yield a stable 3-node set —
/// modelling FOCAS, whose FixedTree appears once a few seconds after connect and then stays put.
/// </summary>
private sealed class DiscoverableStubDriver : StubDriver, ITagDiscovery
{
private int _passCount;
/// <summary>Constructs the fake reporting the given <see cref="DiscoveryRediscoverPolicy"/>;
/// defaults to <see cref="DiscoveryRediscoverPolicy.UntilStable"/> (the interface default) so the
/// existing UntilStable tests are unaffected.</summary>
public DiscoverableStubDriver(DiscoveryRediscoverPolicy policy = DiscoveryRediscoverPolicy.UntilStable)
=> RediscoverPolicy = policy;
/// <summary>The post-connect re-discovery policy this fake reports to the actor.</summary>
public DiscoveryRediscoverPolicy RediscoverPolicy { get; }
/// <summary>Number of <see cref="DiscoverAsync"/> passes the actor has driven.</summary>
public int DiscoverCount => Volatile.Read(ref _passCount);
/// <summary>Streams a growing-then-stable node set into the builder (0,0,3,3,…).</summary>
public Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
var pass = Interlocked.Increment(ref _passCount); // 1-based pass number
var count = pass >= 3 ? 3 : 0;
var fixedTree = builder.Folder("FixedTree", "FixedTree");
for (var i = 0; i < count; i++)
{
fixedTree.Variable($"v{i}", $"v{i}", new DriverAttributeInfo(
FullName: $"m.fixed.v{i}",
DriverDataType: DriverDataType.Float64,
IsArray: false,
ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly,
IsHistorized: false));
}
return Task.CompletedTask;
}
}
/// <summary>
/// A discoverable driver whose <c>DiscoverAsync</c> genuinely SUSPENDS and resumes on a fresh
/// thread-pool thread that carries NO Akka actor cell — modelled on
/// <c>SubscribableStubDriver.UnsubscribeYields</c>. This forces the actor's <c>await DiscoverAsync(...)</c>
/// continuation to resume off-context unless the handler omits <c>ConfigureAwait(false)</c>, so it is a
/// deterministic repro of the no-ActorContext race. Returns a stable 3-node set on every pass.
/// </summary>
private sealed class YieldingDiscoverableStubDriver : StubDriver, ITagDiscovery
{
/// <summary>Suspends on a TCS completed from a background thread, then streams 3 nodes.</summary>
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
_ = Task.Run(() => tcs.SetResult(), cancellationToken);
await tcs.Task.ConfigureAwait(false); // resume on a clean thread-pool thread (no actor cell)
var fixedTree = builder.Folder("FixedTree", "FixedTree");
for (var i = 0; i < 3; i++)
{
fixedTree.Variable($"v{i}", $"v{i}", new DriverAttributeInfo(
FullName: $"m.fixed.v{i}",
DriverDataType: DriverDataType.Float64,
IsArray: false,
ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly,
IsHistorized: false));
}
}
}
/// <summary>
/// A discoverable driver whose set NEVER stabilises: pass N yields N nodes (1,2,3,…), so the
/// full-reference signature differs every pass and the loop can only be bounded by the attempt cap.
/// </summary>
private sealed class GrowingDiscoverableStubDriver : StubDriver, ITagDiscovery
{
private int _passCount;
/// <summary>Constructs the fake reporting the given <see cref="DiscoveryRediscoverPolicy"/>;
/// defaults to <see cref="DiscoveryRediscoverPolicy.UntilStable"/> (the interface default) so the
/// existing attempt-cap test is unaffected. With <see cref="DiscoveryRediscoverPolicy.Once"/> the
/// ever-growing set proves the actor stops after a single pass (UntilStable would keep retrying).</summary>
public GrowingDiscoverableStubDriver(DiscoveryRediscoverPolicy policy = DiscoveryRediscoverPolicy.UntilStable)
=> RediscoverPolicy = policy;
/// <summary>The post-connect re-discovery policy this fake reports to the actor.</summary>
public DiscoveryRediscoverPolicy RediscoverPolicy { get; }
/// <summary>Number of <see cref="DiscoverAsync"/> passes the actor has driven.</summary>
public int DiscoverCount => Volatile.Read(ref _passCount);
/// <summary>Streams an ever-growing node set (pass N → N nodes).</summary>
public Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
var pass = Interlocked.Increment(ref _passCount); // 1-based pass number
var fixedTree = builder.Folder("FixedTree", "FixedTree");
for (var i = 0; i < pass; i++)
{
fixedTree.Variable($"v{i}", $"v{i}", new DriverAttributeInfo(
FullName: $"m.fixed.v{i}",
DriverDataType: DriverDataType.Float64,
IsArray: false,
ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly,
IsHistorized: false));
}
return Task.CompletedTask;
}
}
}
@@ -222,5 +222,7 @@ public sealed class OtOpcUaTelemetryHookTests : RuntimeActorTestBase
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
/// <summary>Rebuilds address space (recorded via span).</summary>
public void RebuildAddressSpace() { /* recorded via span */ }
/// <summary>Announces a NodeAdded model-change (stub implementation).</summary>
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
}
}
@@ -361,6 +361,9 @@ public sealed class OpcUaPublishActorRebuildTests : RuntimeActorTestBase
=> Calls.Enqueue($"EV:{variableNodeId}");
/// <summary>Records a rebuild address space call.</summary>
public void RebuildAddressSpace() => Interlocked.Increment(ref RebuildCalls);
/// <summary>Records a NodeAdded model-change announcement.</summary>
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
public void RaiseNodesAddedModelChange(string affectedNodeId) => Calls.Enqueue($"NA:{affectedNodeId}");
/// <summary>Records a surgical in-place tag-attribute update (always succeeds in this recording sink).</summary>
public bool UpdateTagAttributes(string variableNodeId, bool writable, string? historianTagname, string dataType, bool isArray, uint? arrayLength)
{
@@ -1,10 +1,12 @@
using System.Collections.Concurrent;
using Akka.Actor;
using Microsoft.Extensions.Logging.Abstractions;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Redundancy;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.Commons.Types;
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
using ZB.MOM.WW.OtOpcUa.Runtime.Health;
using ZB.MOM.WW.OtOpcUa.Runtime.OpcUa;
using ZB.MOM.WW.OtOpcUa.Runtime.Tests.Harness;
@@ -98,6 +100,35 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase
AwaitAssert(() => sink.RebuildCalls.ShouldBe(1), duration: TimeSpan.FromMilliseconds(500));
}
/// <summary>Verifies that <see cref="OpcUaPublishActor.MaterialiseDiscoveredNodes"/> forwards to the
/// applier, which drives the sink to ensure the discovered folder + (read-only) variable and announce a
/// NodeAdded model-change under the equipment root — proving the message → handler → applier → sink path
/// end to end (mirrors the real-applier-over-recording-sink harness in
/// <c>OpcUaPublishActorRebuildTests</c>).</summary>
[Fact]
public void MaterialiseDiscoveredNodes_routes_through_applier_to_sink()
{
var sink = new RecordingSink();
var applier = new AddressSpaceApplier(sink, NullLogger<AddressSpaceApplier>.Instance);
var actor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(sink: sink, applier: applier));
var folders = new[] { new DiscoveredFolder("EQ-1/Axes", "EQ-1", "Axes") };
var variables = new[]
{
new DiscoveredVariable("EQ-1/Axes/X", "EQ-1/Axes", "X", "Double",
Writable: false, IsArray: false, ArrayLength: null),
};
actor.Tell(new OpcUaPublishActor.MaterialiseDiscoveredNodes("EQ-1", folders, variables));
AwaitAssert(() =>
{
sink.Folders.ShouldContain(("EQ-1/Axes", "EQ-1", "Axes"));
sink.Variables.ShouldContain(("EQ-1/Axes/X", "EQ-1/Axes", "X", "Double", false));
sink.ModelChanges.ShouldContain("EQ-1");
}, duration: TimeSpan.FromMilliseconds(500));
}
/// <summary>Verifies that ServiceLevelChanged publishes to IServiceLevelPublisher once per unique level.</summary>
[Fact]
public void ServiceLevelChanged_publishes_to_IServiceLevelPublisher_once_per_unique_level()
@@ -548,6 +579,12 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase
public ConcurrentQueue<(string AlarmNodeId, AlarmConditionSnapshot State, DateTime Ts)> AlarmQueue { get; } = new();
/// <summary>Count of rebuild calls.</summary>
public int RebuildCalls;
/// <summary>Gets the queue of recorded EnsureFolder calls.</summary>
public ConcurrentQueue<(string NodeId, string? ParentNodeId, string DisplayName)> FolderQueue { get; } = new();
/// <summary>Gets the queue of recorded EnsureVariable calls.</summary>
public ConcurrentQueue<(string NodeId, string? ParentNodeId, string DisplayName, string DataType, bool Writable)> VariableQueue { get; } = new();
/// <summary>Gets the queue of recorded RaiseNodesAddedModelChange announcements.</summary>
public ConcurrentQueue<string> ModelChangeQueue { get; } = new();
/// <summary>Gets the list of recorded value updates.</summary>
public List<(string NodeId, object? Value, OpcUaQuality Quality, DateTime Ts)> Values =>
@@ -555,6 +592,14 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase
/// <summary>Gets the list of recorded alarm condition updates.</summary>
public List<(string AlarmNodeId, AlarmConditionSnapshot State, DateTime Ts)> Alarms =>
AlarmQueue.ToList();
/// <summary>Gets the list of recorded EnsureFolder calls.</summary>
public List<(string NodeId, string? ParentNodeId, string DisplayName)> Folders =>
FolderQueue.ToList();
/// <summary>Gets the list of recorded EnsureVariable calls.</summary>
public List<(string NodeId, string? ParentNodeId, string DisplayName, string DataType, bool Writable)> Variables =>
VariableQueue.ToList();
/// <summary>Gets the list of recorded RaiseNodesAddedModelChange announcements.</summary>
public List<string> ModelChanges => ModelChangeQueue.ToList();
/// <summary>Records a value update.</summary>
/// <param name="nodeId">The OPC UA node identifier.</param>
@@ -579,23 +624,29 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase
/// <param name="severity">The domain severity.</param>
public void MaterialiseAlarmCondition(string alarmNodeId, string equipmentNodeId, string displayName, string alarmType, int severity, bool isNative = false) { }
/// <summary>Ensures a folder exists (no-op in test).</summary>
/// <summary>Records a folder ensure call.</summary>
/// <param name="folderNodeId">The OPC UA folder node identifier.</param>
/// <param name="parentNodeId">The parent folder node identifier, or null for root.</param>
/// <param name="displayName">The display name of the folder.</param>
public void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName) { }
public void EnsureFolder(string folderNodeId, string? parentNodeId, string displayName) =>
FolderQueue.Enqueue((folderNodeId, parentNodeId, displayName));
/// <summary>Ensures a variable exists (no-op in test).</summary>
/// <summary>Records a variable ensure call.</summary>
/// <param name="variableNodeId">The OPC UA variable node identifier.</param>
/// <param name="parentFolderNodeId">The parent folder node identifier, or null for root.</param>
/// <param name="displayName">The display name of the variable.</param>
/// <param name="dataType">The OPC UA built-in type name.</param>
/// <param name="writable">Whether the node is created read/write.</param>
/// <param name="historianTagname">The resolved historian tagname (null ⇒ not historized).</param>
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) { }
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable, string? historianTagname = null, bool isArray = false, uint? arrayLength = null) =>
VariableQueue.Enqueue((variableNodeId, parentFolderNodeId, displayName, dataType, writable));
/// <summary>Records a rebuild call.</summary>
public void RebuildAddressSpace() => Interlocked.Increment(ref RebuildCalls);
/// <summary>Records a NodeAdded model-change announcement.</summary>
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
public void RaiseNodesAddedModelChange(string affectedNodeId) => ModelChangeQueue.Enqueue(affectedNodeId);
}
/// <summary>Test implementation of IServiceLevelPublisher that records publishes.</summary>