Compare commits
81 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1030d00b3f | |||
| 10a6ac6f3e | |||
| 60695179ee | |||
| 00cc1da362 | |||
| 111adc92b6 | |||
| b2276b5b04 | |||
| 9fca3d9c05 | |||
| 240c967576 | |||
| 44644ddc7f | |||
| 2982cc4bb5 | |||
| 2124f21ab6 | |||
| 0b4b2e4cfd | |||
| 245db98f5e | |||
| b32436902a | |||
| 2a5c717755 | |||
| 97528c500f | |||
| 82124ee4f8 | |||
| bbfbc7b215 | |||
| 8b4028de84 | |||
| 035bde0562 | |||
| 22711444cc | |||
| 0be79219fc | |||
| 8559905e8a | |||
| d3081a659f | |||
| 555bd477f1 | |||
| 1a6eb7efe6 | |||
| 36f7c3c5bf | |||
| 1d5fa8230e | |||
| 718f1fdad2 | |||
| 35aace7fdf | |||
| 0a540d9f09 | |||
| 1e93b2ebfb | |||
| c51ca2276b | |||
| a96e85f0e4 | |||
| a54c7a9366 | |||
| c7296d7458 | |||
| 3226b87818 | |||
| c822a6b196 | |||
| a98fc46d26 | |||
| 369e832e5a | |||
| a7dd2f59d0 | |||
| 04159fd716 | |||
| 1058542d80 | |||
| 0074f37a64 | |||
| 50f08635ec | |||
| 51721df563 | |||
| 05c820795a | |||
| cde16063d9 | |||
| 533671487e | |||
| adcd7b57c1 | |||
| 915492a759 | |||
| cb7ce7f171 | |||
| e7d5ebe956 | |||
| f7358bf4fd | |||
| a1a655e6c9 | |||
| ce34816a50 | |||
| efbdaf853c | |||
| a378b572af | |||
| c2c368dcec | |||
| 37cac5dee5 | |||
| 25ccd25b6b | |||
| 5104540e32 | |||
| 1aa13ebd27 | |||
| 0788bad145 | |||
| b1e4fba792 | |||
| 21298ec1b2 | |||
| b9b8d3d389 | |||
| cf6b1abf4c | |||
| 51634cca38 | |||
| bb21db0a8e | |||
| 34fc304712 | |||
| ccf93fc029 | |||
| 598cdfad5a | |||
| f8406d348c | |||
| 93f7586590 | |||
| 33b0e639a5 | |||
| d7a0da5ea1 | |||
| da55c6913d | |||
| 74e07225c9 | |||
| 6600ce9940 | |||
| 235b8b8e6d |
@@ -112,7 +112,7 @@ lmxopcua-fix sync modbus # rsync this repo's tests/.../Docker/
|
||||
|
||||
Override any endpoint via the env var to point at a real PLC. The local OtOpcUa server runs on this VM at `opc.tcp://localhost:4840` — **that's not on the docker host**.
|
||||
|
||||
**Local docker-dev rig — login is DISABLED, so do live `/run` verification yourself (don't wait for the user to sign in).** The local `docker-dev/docker-compose.yml` stack (AdminUI at `http://localhost:9200` via Traefik; OPC UA `opc.tcp://localhost:4840` central-1 / `:4841` central-2) runs the AdminUI with `Security__Auth__DisableLogin: "true"` — **no sign-in form; it's auto-authenticated as a full-access admin.** So AdminUI / Razor `/run` verification (deploy a config, drive a page, confirm behavior — e.g. via the Chrome browser-automation tools against `http://localhost:9200`) does **not** require the user to log in. Run it yourself; do not defer it as "user-driven sign-in required." (Caveat: OPC UA *data-plane* auth is still real LDAP against the shared GLAuth on `10.100.0.35:3893` — that only gates Client.CLI read/write **role** operations, e.g. binding a `multi-role` / `opc-writeop` user, and is independent of the AdminUI login. Things genuinely outside the local rig — real PLCs, or the AVEVA Historian + Wonderware sidecar on `10.100.0.48` — still need the user.)
|
||||
**Local docker-dev rig — login is DISABLED, so do live `/run` verification yourself (don't wait for the user to sign in).** The local `docker-dev/docker-compose.yml` stack (AdminUI at `http://localhost:9200` via Traefik; OPC UA `opc.tcp://localhost:4840` central-1 / `:4841` central-2) runs the AdminUI with `Security__Auth__DisableLogin: "true"` — **no sign-in form; it's auto-authenticated as a full-access admin.** So AdminUI / Razor `/run` verification (deploy a config, drive a page, confirm behavior — e.g. via the Chrome browser-automation tools against `http://localhost:9200`) does **not** require the user to log in. Run it yourself; do not defer it as "user-driven sign-in required." (Caveat: OPC UA *data-plane* auth is still real LDAP against the shared GLAuth on `10.100.0.35:3893` — that only gates Client.CLI read/write **role** operations, e.g. binding a `multi-role` / `opc-writeop` user, and is independent of the AdminUI login. Things genuinely outside the local rig — real PLCs, or the AVEVA Historian reached via the `ZB.MOM.WW.HistorianGateway` sidecar — still need the user.)
|
||||
|
||||
See `docs/v2/dev-environment.md` for the full inventory and rationale.
|
||||
|
||||
@@ -165,7 +165,7 @@ Address pickers in AdminUI support live browse for OpcUaClient and Galaxy driver
|
||||
|
||||
The AdminUI's global **UNS** page (`/uns`) is the single surface for managing the unified namespace fleet-wide (Area → Line → Equipment → Tag/VirtualTag), replacing the old per-cluster UNS/Equipment/Tags tabs. See `docs/Uns.md`.
|
||||
|
||||
The `/uns` **TagModal** uses **driver-typed tag-config editors**: it dispatches by the bound driver's `DriverType` to a per-driver editor (Modbus/S7/AbCip/AbLegacy/TwinCAT/Focas) via `TagConfigEditorMap`, with client-side validation via `TagConfigValidator`; unmapped drivers (OpcUaClient/Galaxy/Historian.Wonderware) fall back to the generic raw-`TagConfig`-JSON textarea. Each editor is a thin razor shell over a pure `<Driver>TagConfigModel` (`FromJson`/`ToJson`/`Validate`, preserves unknown keys). To add a driver's editor, copy the Modbus template under `Components/Shared/Uns/TagEditors/` + `Uns/TagEditors/`, reusing the driver's enums + camelCase JSON property names, and register it in `TagConfigEditorMap` + `TagConfigValidator`. See `docs/plans/2026-06-09-driver-typed-tag-editors-design.md`.
|
||||
The `/uns` **TagModal** uses **driver-typed tag-config editors**: it dispatches by the bound driver's `DriverType` to a per-driver editor (Modbus/S7/AbCip/AbLegacy/TwinCAT/Focas) via `TagConfigEditorMap`, with client-side validation via `TagConfigValidator`; unmapped drivers (OpcUaClient/Galaxy) fall back to the generic raw-`TagConfig`-JSON textarea. Each editor is a thin razor shell over a pure `<Driver>TagConfigModel` (`FromJson`/`ToJson`/`Validate`, preserves unknown keys). To add a driver's editor, copy the Modbus template under `Components/Shared/Uns/TagEditors/` + `Uns/TagEditors/`, reusing the driver's enums + camelCase JSON property names, and register it in `TagConfigEditorMap` + `TagConfigValidator`. See `docs/plans/2026-06-09-driver-typed-tag-editors-design.md`.
|
||||
|
||||
## Scripting / Script Editor
|
||||
|
||||
@@ -186,4 +186,131 @@ Inbound operator acknowledge/shelve for scripted alarms is fully implemented. Tw
|
||||
|
||||
## Historian / HistoryRead
|
||||
|
||||
Server-side OPC UA HistoryRead for historized equipment tags is implemented driver-agnostically in Phase C. A tag is historized by adding `"isHistorized": true` to its `TagConfig` JSON blob (authored in the raw-JSON textarea on the `/uns` TagModal); an optional `"historianTagname"` field overrides the default historian tagname, which is the tag's driver `FullName`. The server dispatches all history reads to the registered `IHistorianDataSource` (Wonderware historian TCP client) via the `ServerHistorian` appsettings section (`Enabled` defaults to `false`; when disabled, a `NullHistorianDataSource` is used and historized nodes return `GoodNoData` rather than an error). Supported variants: Raw, Processed (Average/Minimum/Maximum/Total/Count aggregates), and AtTime over historized variable nodes; Events over alarm-owning equipment-folder event-notifier nodes. Reads are ungated (served from any redundancy node); authorization uses the standard `AccessLevels.HistoryRead` bit set at materialization. See `docs/Historian.md` for the full guide.
|
||||
**Backend: HistorianGateway (sole historian backend).** As of the gateway-integration cutover, the
|
||||
historian read, alarm-write, and continuous-historization paths are all served by the
|
||||
**`ZB.MOM.WW.HistorianGateway`** sidecar, consumed as the Gitea-feed
|
||||
**`ZB.MOM.WW.HistorianGateway.Client`** gRPC package (`historian_gateway.v1`) behind a thin
|
||||
`IHistorianGatewayClient` seam in `ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway`. **The bespoke
|
||||
Wonderware TCP/ArchestrA sidecar projects and the vestigial `Historian.Wonderware` driver type were
|
||||
retired** — there is no Wonderware backend in the tree anymore (see `docs/drivers/Historian.Wonderware.md`,
|
||||
now a retired stub).
|
||||
|
||||
A tag is historized by adding `"isHistorized": true` to its `TagConfig` JSON blob (authored in the
|
||||
raw-JSON textarea on the `/uns` TagModal); an optional `"historianTagname"` field overrides the default
|
||||
historian tagname, which is the tag's driver `FullName`.
|
||||
|
||||
### Read path (`ServerHistorian` section)
|
||||
|
||||
The server dispatches all OPC UA HistoryRead to the registered `IHistorianDataSource` — the
|
||||
`GatewayHistorianDataSource` read client when enabled, else the `NullHistorianDataSource` default
|
||||
(historized nodes return `GoodNoData`, never an error). Supported variants: Raw, Processed
|
||||
(Average/Minimum/Maximum/Total/Count aggregates), and AtTime over historized variable nodes; Events over
|
||||
alarm-owning equipment-folder event-notifier nodes. Reads are ungated (served from any redundancy node);
|
||||
authorization uses the standard `AccessLevels.HistoryRead` bit set at materialization.
|
||||
|
||||
`ServerHistorian` appsettings keys (`ServerHistorianOptions`; `Enabled` defaults to `false`):
|
||||
|
||||
| Key | Default | Notes |
|
||||
|---|---|---|
|
||||
| `Enabled` | `false` | `true` registers the gateway read client; `false` keeps `NullHistorianDataSource` |
|
||||
| `Endpoint` | `""` | Absolute gateway URI, e.g. `https://host:5222`. Scheme selects transport (`https://` = TLS, `http://` = h2c) |
|
||||
| `ApiKey` | `""` | Peppered-HMAC key `histgw_<id>_<secret>` sent as `Authorization: Bearer`. **Supply via env `ServerHistorian__ApiKey` — never commit** |
|
||||
| `UseTls` | `true` | Connect over TLS; must match the `Endpoint` scheme |
|
||||
| `AllowUntrustedServerCertificate` | `false` | Accept a self-signed / untrusted server cert (dev / on-prem only) |
|
||||
| `CaCertificatePath` | `null` | PEM CA file pinning the gateway TLS chain; null/empty uses the OS trust store |
|
||||
| `CallTimeout` | `00:00:30` | Per-call deadline for each unary gateway read |
|
||||
| `MaxTieClusterOverfetch` | `65536` | Bounded over-fetch the HistoryRead-Raw paging uses to page within an oversized same-timestamp tie cluster (retained from the prior backend) |
|
||||
|
||||
### Alarm-history path (`AlarmHistorian` section)
|
||||
|
||||
Alarm events are written through `GatewayAlarmHistorianWriter` (the gateway **`SendEvent`** path) behind
|
||||
the durable **`SqliteStoreAndForwardSink`** — `AlarmHistorian:Enabled=true` swaps the `NullAlarmHistorianSink`
|
||||
default for the SQLite store-and-forward queue, whose drain worker forwards batches to the gateway and uses
|
||||
per-event outcomes to decide retry vs. dead-letter (never throws). The `AlarmHistorian` section carries
|
||||
only the `Enabled` gate + the SQLite knobs (`DatabasePath`, `DrainIntervalSeconds`, `Capacity`,
|
||||
`DeadLetterRetentionDays`, `BatchSize`, `MaxAttempts`) — the downstream gateway connection
|
||||
(endpoint/key/TLS) is sourced from the `ServerHistorian` section. **Alarm-history `ReadEvents` requires the
|
||||
target gateway deployed with `RuntimeDb:EventReadsEnabled=true`** (the C2 SQL event-read workaround).
|
||||
|
||||
### Continuous historization (`ContinuousHistorization` section)
|
||||
|
||||
When `ContinuousHistorization:Enabled=true` **and** `ServerHistorian` is configured, the Host builds a
|
||||
durable, crash-safe **FasterLog** outbox (`FasterLogHistorizationOutbox`) + a gateway-backed
|
||||
`IHistorianValueWriter`, and `WithOtOpcUaRuntimeActors` spawns the `ContinuousHistorizationRecorder`. The
|
||||
recorder taps the per-node dependency-mux value fan-out, appends each numeric value to the outbox (the
|
||||
crash boundary), and drains the outbox to the gateway's SQL live-value write path (**`WriteLiveValues`**)
|
||||
with exponential backoff. The gateway connection is sourced from `ServerHistorian`; this section carries
|
||||
only the recorder + outbox knobs:
|
||||
|
||||
| Key | Default | Notes |
|
||||
|---|---|---|
|
||||
| `Enabled` | `false` | `true` (with `ServerHistorian` configured) wires + spawns the recorder |
|
||||
| `OutboxPath` | `""` (required when enabled) | **Directory** holding the FasterLog segment + commit files. In production set an **absolute** path on durable storage |
|
||||
| `CommitMode` | `PerEntry` | `PerEntry` = fsync before each append returns (no loss window); `Periodic` = batched commits every `CommitIntervalMs` |
|
||||
| `CommitIntervalMs` | `100` | Periodic-mode commit cadence; required `> 0` only under `Periodic` |
|
||||
| `DrainBatchSize` | `64` | Entries peeked + written per drain pass |
|
||||
| `DrainIntervalSeconds` | `2` | Steady drain cadence (and post-success reschedule) |
|
||||
| `Capacity` | `0` | Max un-acked outbox entries before drop-oldest; `0` = unbounded |
|
||||
| `MinBackoffSeconds` | `1` | Initial retry backoff after a failed drain pass |
|
||||
| `MaxBackoffSeconds` | `30` | Cap on the exponential retry backoff |
|
||||
|
||||
### Tag auto-provisioning (`IHistorianProvisioning` EnsureTags hook)
|
||||
|
||||
`AddressSpaceApplier.Apply()` fires a **non-blocking, fire-and-forget** `IHistorianProvisioning.EnsureTagsAsync`
|
||||
hook for added historized value tags — the gateway-backed `GatewayTagProvisioner` calls the gateway's
|
||||
`EnsureTags` so a brand-new historized tag exists in the historian before the recorder's `WriteLiveValues`
|
||||
land. The hook is wrapped so a faulted/synchronously-throwing provisioner can **never** block or fail a
|
||||
deploy. Non-numeric (`String`/`DateTime`/`Reference`) data types are skipped (not provisioned); the
|
||||
recorder likewise drops + meters non-numeric values. Continuous historization is **numeric-analog only** in
|
||||
v1 (`UInt16→UInt4` is a documented fallback).
|
||||
|
||||
### Gateway-side prerequisites
|
||||
|
||||
The target HistorianGateway OtOpcUa points at **must** run with:
|
||||
|
||||
- `RuntimeDb:Enabled=true` — enables the `WriteLiveValues` SQL live path (continuous historization).
|
||||
- `RuntimeDb:EventReadsEnabled=true` — enables `ReadEvents` from `Runtime.dbo.Events` (alarm-history reads).
|
||||
- An API key carrying scopes **`historian:read`**, **`historian:write`**, **`historian:tags:write`**.
|
||||
|
||||
### Migration note (deployments upgrading from the Wonderware backend)
|
||||
|
||||
The `ServerHistorian` section changed shape. Rename the old Wonderware keys and supply the key via env:
|
||||
|
||||
| Old (Wonderware) key | New (gateway) key |
|
||||
|---|---|
|
||||
| `ServerHistorian:Host` + `:Port` | `ServerHistorian:Endpoint` (`https://host:5222`) |
|
||||
| `ServerHistorian:SharedSecret` | `ServerHistorian:ApiKey` (**env `ServerHistorian__ApiKey`**) |
|
||||
| `ServerHistorian:ServerCertThumbprint` | `ServerHistorian:CaCertificatePath` (+ `UseTls` / `AllowUntrustedServerCertificate`) |
|
||||
|
||||
The `AlarmHistorian` section's old Wonderware connection keys (`Host`/`Port`/`UseTls`/`ServerCertThumbprint`/`SharedSecret`)
|
||||
were pruned — remove them; the SQLite knobs are retained and the downstream connection now comes from
|
||||
`ServerHistorian`. See `docs/Historian.md` for the full guide.
|
||||
|
||||
### KNOWN LIMITATION 1 — live-validation gate (do before merging/trusting the cutover)
|
||||
|
||||
The cutover is code-complete but **must be live-validated against a real gateway** (VPN to
|
||||
`wonder-sql-vd03`, gateway running the prerequisites above) before it is merged or trusted. Run the
|
||||
env-gated suite:
|
||||
|
||||
```bash
|
||||
export HISTGW_GATEWAY_ENDPOINT=https://wonder-sql-vd03:5222 # absolute gateway URI; absent ⇒ all live tests skip
|
||||
export HISTGW_GATEWAY_APIKEY=histgw_<id>_<secret> # must carry historian:read + historian:write (+ tags:write) scopes
|
||||
export HISTGW_TEST_TAG=<existing-tag> # read round-trip
|
||||
export HISTGW_WRITE_SANDBOX_TAG=<writable-float-tag> # e.g. HistGW.LiveTest.Sandbox — write round-trip (EnsureTags + write)
|
||||
export HISTGW_ALARM_SOURCE=<source-name> # alarm SendEvent → ReadEvents round-trip
|
||||
dotnet test --filter "Category=LiveIntegration"
|
||||
```
|
||||
|
||||
The live suite **skips cleanly** when these env vars are absent (safe to run offline on macOS). It is the
|
||||
gate the operator runs on the VPN before trusting the cutover.
|
||||
|
||||
### KNOWN LIMITATION 2 — continuous-historization value-capture is not yet live
|
||||
|
||||
The `ContinuousHistorizationRecorder` is fully wired (actor + FasterLog outbox + gateway value-writer +
|
||||
meters) but is currently spawned with an **EMPTY historized-ref set** (`Array.Empty<string>()` in
|
||||
`WithOtOpcUaRuntimeActors`): the deployed address space — and thus the set of historized tag refs — is built
|
||||
later at deploy time, not at actor-spawn time, so there is no clean ref set to resolve at wiring time. With
|
||||
an empty set the recorder **registers interest in nothing and historizes nothing**. **Reads and alarm-writes
|
||||
work today**; the recorder's value-capture is the remaining gap, blocked on a `SetHistorizedRefs`-style feed
|
||||
driven off the deployed composition (a tracked follow-on). Until that feed lands, continuous historization
|
||||
records no values.
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
<PackageVersion Include="Akka.Cluster.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Cluster.Tools" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Logger.Serilog" Version="1.5.60" />
|
||||
<PackageVersion Include="Akka.Remote" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Remote.Hosting" Version="1.5.62" />
|
||||
<PackageVersion Include="Akka.Streams" Version="1.5.62" />
|
||||
@@ -55,6 +56,7 @@
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.InMemory" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.FASTER.Core" Version="2.6.5" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Configuration.Abstractions" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.Configuration.Json" Version="10.0.7" />
|
||||
<PackageVersion Include="Microsoft.Extensions.DependencyInjection" Version="10.0.7" />
|
||||
@@ -119,5 +121,7 @@
|
||||
<PackageVersion Include="ZB.MOM.WW.Auth.AspNetCore" Version="0.1.1" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Audit" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Theme" Version="0.3.1" />
|
||||
<PackageVersion Include="ZB.MOM.WW.HistorianGateway.Client" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.HistorianGateway.Contracts" Version="0.1.0" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -23,6 +23,8 @@
|
||||
<package pattern="ZB.MOM.WW.Auth.*" />
|
||||
<package pattern="ZB.MOM.WW.Audit" />
|
||||
<package pattern="ZB.MOM.WW.Theme" />
|
||||
<package pattern="ZB.MOM.WW.HistorianGateway.Contracts" />
|
||||
<package pattern="ZB.MOM.WW.HistorianGateway.Client" />
|
||||
</packageSource>
|
||||
</packageSourceMapping>
|
||||
</configuration>
|
||||
|
||||
@@ -31,7 +31,7 @@ Galaxy is the only driver with an external runtime: it speaks gRPC to a separate
|
||||
- .NET 10 SDK (server, drivers, clients all target .NET 10)
|
||||
- SQL Server reachable for the central config DB
|
||||
- For Galaxy specifically: a running `mxaccessgw` deployment — see [docs/v2/Galaxy.ParityRig.md](docs/v2/Galaxy.ParityRig.md)
|
||||
- For Wonderware Historian read-back: optional `OtOpcUaWonderwareHistorian` sidecar — see [docs/ServiceHosting.md](docs/ServiceHosting.md)
|
||||
- For historian read-back / alarm history / continuous historization: a running [`ZB.MOM.WW.HistorianGateway`](docs/Historian.md) deployment (the sole historian backend; consumed as the `ZB.MOM.WW.HistorianGateway.Client` gRPC package). It must run `RuntimeDb:Enabled=true` + `RuntimeDb:EventReadsEnabled=true`, and the API key must carry `historian:read` + `historian:write` + `historian:tags:write` scopes.
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -48,7 +48,7 @@ The server starts on `opc.tcp://localhost:4840` with the `None` security profile
|
||||
|
||||
## Install as Windows Services
|
||||
|
||||
Production deployment is driven by `scripts/install/Install-Services.ps1`, which registers the `OtOpcUa` server service (and optionally the `OtOpcUaWonderwareHistorian` sidecar) under a chosen service account. Galaxy support requires a separately installed `mxaccessgw` — neither this repo nor the install script provisions it.
|
||||
Production deployment is driven by `scripts/install/Install-Services.ps1`, which registers the `OtOpcUa` server service under a chosen service account. Historian support requires a separately deployed `ZB.MOM.WW.HistorianGateway` and Galaxy support a separately installed `mxaccessgw` — neither this repo nor the install script provisions them.
|
||||
|
||||
```powershell
|
||||
.\scripts\install\Install-Services.ps1 `
|
||||
@@ -56,7 +56,7 @@ Production deployment is driven by `scripts/install/Install-Services.ps1`, which
|
||||
-ServiceAccount 'DOMAIN\svc-otopcua'
|
||||
```
|
||||
|
||||
Add `-InstallWonderwareHistorian` for the historian sidecar. See the script header and [docs/ServiceHosting.md](docs/ServiceHosting.md) for full options.
|
||||
The historian backend is the external `ZB.MOM.WW.HistorianGateway` (not installed by this script). See the script header and [docs/ServiceHosting.md](docs/ServiceHosting.md) for full options.
|
||||
|
||||
## Client CLI
|
||||
|
||||
@@ -80,7 +80,7 @@ See [docs/Client.CLI.md](docs/Client.CLI.md) and [docs/Client.UI.md](docs/Client
|
||||
| Address space layout | [docs/AddressSpace.md](docs/AddressSpace.md) |
|
||||
| Read / Write dispatch (driver vs virtual vs scripted-alarm) | [docs/ReadWriteOperations.md](docs/ReadWriteOperations.md) |
|
||||
| Incremental sync (driver-backend rediscovery + config publishes) | [docs/IncrementalSync.md](docs/IncrementalSync.md) |
|
||||
| Service hosting (Server + Admin + optional historian sidecar) | [docs/ServiceHosting.md](docs/ServiceHosting.md) |
|
||||
| Service hosting (Server + Admin; external HistorianGateway backend) | [docs/ServiceHosting.md](docs/ServiceHosting.md) |
|
||||
| Security (transport, LDAP, certificates) | [docs/security.md](docs/security.md) |
|
||||
| Redundancy | [docs/Redundancy.md](docs/Redundancy.md) |
|
||||
| Status dashboard | [docs/StatusDashboard.md](docs/StatusDashboard.md) |
|
||||
|
||||
@@ -24,9 +24,7 @@
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Contracts/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway/ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ZB.MOM.WW.OtOpcUa.Driver.Modbus.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing.csproj" />
|
||||
<Project Path="src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Contracts.csproj" />
|
||||
@@ -85,8 +83,7 @@
|
||||
<Folder Name="/tests/Drivers/">
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser.Tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Browser.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Tests/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Tests/ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing.Tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Addressing.Tests.csproj" />
|
||||
<Project Path="tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus.IntegrationTests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.IntegrationTests.csproj" />
|
||||
|
||||
+27
-28
@@ -16,7 +16,7 @@ and [ServiceHosting.md](ServiceHosting.md).
|
||||
## Why store-and-forward
|
||||
|
||||
Scripted alarms (and any future non-Galaxy `IAlarmSource`, e.g. AB CIP ALMD)
|
||||
must reach AVEVA Historian, but the historian sidecar can be slow, busy, or
|
||||
must reach AVEVA Historian, but the historian gateway can be slow, busy, or
|
||||
disconnected. The sink decouples the alarm engine from historian reachability:
|
||||
every qualifying transition is committed to a **local SQLite queue first**, and
|
||||
a background drain worker forwards rows to the historian on a backoff-aware
|
||||
@@ -52,8 +52,8 @@ unless noted.
|
||||
`TimestampUtc`.
|
||||
- **`IAlarmHistorianWriter`** — what the drain worker delegates writes to.
|
||||
`WriteBatchAsync(batch, ct)` returns one `HistorianWriteOutcome` per event,
|
||||
in order. Production binds this to `WonderwareHistorianClient` (the AVEVA
|
||||
Historian sidecar IPC client).
|
||||
in order. Production binds this to `GatewayAlarmHistorianWriter` (the
|
||||
HistorianGateway `SendEvent` path).
|
||||
- **`HistorianWriteOutcome`** — per-event drain result: `Ack` (persisted,
|
||||
remove from queue), `RetryPlease` (transient failure — leave queued, retry
|
||||
after backoff), `PermanentFail` (malformed/unrecoverable — move to
|
||||
@@ -160,9 +160,9 @@ node whose `RedundancyRole` is `Primary` historizes, giving exactly-once
|
||||
writes across a redundant pair. `AlarmTransitionEvent` carries `AlarmTypeName`
|
||||
(the Part 9 subtype string) and `Comment` (the operator comment from the
|
||||
originating ack/shelve command) that populate the corresponding fields of
|
||||
`AlarmHistorianEvent`. `WonderwareHistorianClient` is the `IAlarmHistorianWriter`
|
||||
the drain worker delegates to. See [ServiceHosting.md](ServiceHosting.md) for
|
||||
the sidecar setup.
|
||||
`AlarmHistorianEvent`. `GatewayAlarmHistorianWriter` is the `IAlarmHistorianWriter`
|
||||
the drain worker delegates to (the gateway `SendEvent` path). See
|
||||
[ServiceHosting.md](ServiceHosting.md) for the (external) HistorianGateway setup.
|
||||
|
||||
**Scope:** scripted alarms only. Galaxy-native alarms historize via System
|
||||
Platform's `HistorizeToAveva` toggle (not this actor); AB CIP ALMD is not on
|
||||
@@ -174,40 +174,40 @@ The real sink is opt-in via the `AlarmHistorian` section of `appsettings.json`.
|
||||
When `Enabled` is `false` (the default), `AddAlarmHistorian` registers
|
||||
`NullAlarmHistorianSink` and the feature is dormant. When `Enabled` is `true`,
|
||||
`AddAlarmHistorian` constructs `SqliteStoreAndForwardSink` and registers
|
||||
`WonderwareHistorianClient` as the `IAlarmHistorianWriter`.
|
||||
`GatewayAlarmHistorianWriter` as the `IAlarmHistorianWriter`. This section carries
|
||||
**only** the `Enabled` gate + the SQLite store-and-forward knobs — the downstream
|
||||
gateway connection (endpoint / key / TLS) is sourced from the `ServerHistorian`
|
||||
section (see [Historian.md](Historian.md)).
|
||||
|
||||
```json
|
||||
{
|
||||
"AlarmHistorian": {
|
||||
"Enabled": true,
|
||||
"DatabasePath": "C:\\ProgramData\\OtOpcUa\\alarmhistorian.db",
|
||||
"SharedSecret": "<token from historian sidecar config>",
|
||||
"BatchSize": 100
|
||||
},
|
||||
"Historian": {
|
||||
"Wonderware": {
|
||||
"Host": "localhost",
|
||||
"Port": 32569,
|
||||
"UseTls": false,
|
||||
"ServerCertThumbprint": ""
|
||||
}
|
||||
"BatchSize": 100,
|
||||
"DrainIntervalSeconds": 5,
|
||||
"Capacity": 1000000,
|
||||
"DeadLetterRetentionDays": 30
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `Enabled` | bool | `false` | Enable the real SQLite + Wonderware sink. `false` → `NullAlarmHistorianSink`. |
|
||||
| `DatabasePath` | string | — | Absolute path to the SQLite queue file. Created on first use (WAL mode). Required when `Enabled`. |
|
||||
| `SharedSecret` | string | — | Shared secret token the sidecar expects on every connection. Required when `Enabled`. |
|
||||
| `Enabled` | bool | `false` | Enable the SQLite store-and-forward sink (drains to the HistorianGateway `SendEvent` path). `false` → `NullAlarmHistorianSink`. |
|
||||
| `DatabasePath` | string | `alarm-historian.db` | Path to the SQLite queue file. Created on first use (WAL mode). Set an **absolute** path in production. |
|
||||
| `BatchSize` | int | `100` | Max rows per drain cycle handed to `IAlarmHistorianWriter.WriteBatchAsync`. |
|
||||
| `DrainIntervalSeconds` | int | `5` | Seconds between drain-worker ticks. |
|
||||
| `Capacity` | long | `1000000` | Max queued rows before the sink evicts the oldest (data-loss signal via `EvictedCount`). |
|
||||
| `DeadLetterRetentionDays` | int | `30` | Days to retain dead-lettered rows before purge. |
|
||||
| `MaxAttempts` | int | `10` | Maximum delivery attempts before a poison (perpetually-retrying) row is dead-lettered automatically. Must be > 0. |
|
||||
| `AlarmHistorian:Host` | string | `localhost` | DNS name or IP of the machine running the historian sidecar. |
|
||||
| `AlarmHistorian:Port` | int | `32569` | TCP port the sidecar listens on (`OTOPCUA_HISTORIAN_TCP_PORT`). |
|
||||
| `AlarmHistorian:UseTls` | bool | `false` | Wrap the TCP stream in TLS before the Hello handshake. |
|
||||
| `AlarmHistorian:ServerCertThumbprint` | string | — | Optional SHA-1 thumbprint to pin the sidecar's TLS server certificate. Leave empty to use normal CA-chain validation. |
|
||||
|
||||
> Dev and docker-dev deployments leave `Enabled` unset (defaults to `false`) so alarm transitions historize to nowhere unless a historian sidecar is present.
|
||||
> The downstream gateway connection lives in `ServerHistorian` (`Endpoint` + env `ServerHistorian__ApiKey`,
|
||||
> `UseTls`, `CaCertificatePath`); alarm-history `ReadEvents` additionally requires the gateway running
|
||||
> `RuntimeDb:EventReadsEnabled=true`. The old Wonderware connection keys (`SharedSecret` /
|
||||
> `AlarmHistorian:Host`/`Port`/`UseTls`/`ServerCertThumbprint`) were pruned.
|
||||
|
||||
> Dev and docker-dev deployments leave `Enabled` unset (defaults to `false`) so alarm transitions historize to nowhere unless a HistorianGateway is configured.
|
||||
|
||||
---
|
||||
|
||||
@@ -217,8 +217,7 @@ When `Enabled` is `false` (the default), `AddAlarmHistorian` registers
|
||||
Part 9 surface; which alarms route to this sink.
|
||||
- [DriverLifecycle.md](DriverLifecycle.md) — `IHistorianDataSource` (the
|
||||
historian *read* surface; this page covers the *write* path) and the
|
||||
`WonderwareHistorianClient`.
|
||||
`GatewayHistorianDataSource`.
|
||||
- [ScriptedAlarms.md](ScriptedAlarms.md) — the scripted-alarm engine that emits
|
||||
most events into this sink.
|
||||
- [ServiceHosting.md](ServiceHosting.md) — the optional Wonderware historian
|
||||
sidecar.
|
||||
- [ServiceHosting.md](ServiceHosting.md) — the external HistorianGateway backend.
|
||||
|
||||
@@ -203,14 +203,14 @@ Under warm/hot redundancy, both cluster nodes run `ScriptedAlarmHostActor` and t
|
||||
## Historian write-back (non-Galaxy alarms)
|
||||
|
||||
Scripted alarms (and any future non-Galaxy `IAlarmSource` like
|
||||
AB CIP ALMD) route to AVEVA Historian via the Wonderware sidecar:
|
||||
AB CIP ALMD) route to AVEVA Historian via the HistorianGateway:
|
||||
|
||||
- `IAlarmHistorianSink` is the DI-registered intake contract. The
|
||||
default binding is `NullAlarmHistorianSink` (registered in
|
||||
`ServiceCollectionExtensions.AddOtOpcUaRuntime`). Production
|
||||
deployments override it with `SqliteStoreAndForwardSink` wrapping
|
||||
`WonderwareHistorianClient` (the AVEVA Historian sidecar IPC client)
|
||||
— see [ServiceHosting.md](ServiceHosting.md) for the sidecar setup.
|
||||
`GatewayAlarmHistorianWriter` (the HistorianGateway `SendEvent` path)
|
||||
— see [ServiceHosting.md](ServiceHosting.md) for the HistorianGateway setup.
|
||||
- `SqliteStoreAndForwardSink` queues each transition to a local
|
||||
SQLite database and drains in the background via an
|
||||
`IAlarmHistorianWriter`. **The durability guarantee is bounded**: the
|
||||
|
||||
+1
-1
@@ -189,7 +189,7 @@ The alarm subscription source node is saved and restored on reconnection with au
|
||||
|
||||

|
||||
|
||||
Read historical data from the Wonderware Historian.
|
||||
Read historical data from the historian (served server-side by the HistorianGateway backend).
|
||||
|
||||
### Time Range
|
||||
|
||||
|
||||
+19
-22
@@ -119,11 +119,21 @@ The Galaxy/MxAccess connection settings are **not an `appsettings` section.** Th
|
||||
|
||||
> The `OTOPCUA_GALAXY_*` environment variables that v1's in-process `Galaxy.Host` consumed **no longer live in this repo** — they moved into the separately-installed mxaccessgw gateway's own config (see the v1 archive pointer in `docs/README.md` and the Galaxy overview at [`docs/drivers/Galaxy.md`](drivers/Galaxy.md)). The only Galaxy connection secret this repo touches is the gateway API key via `ApiKeySecretRef` above.
|
||||
|
||||
### Historian config (TCP sidecar)
|
||||
### Historian config (HistorianGateway)
|
||||
|
||||
The Wonderware Historian sidecar (`OtOpcUaWonderwareHistorian`) is an independent Windows service that the OtOpcUa host connects to over TCP. It is **not** spawned as a child process by the host — the two services are started independently (e.g. by NSSM / `sc.exe`). The sidecar entry point (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Program.cs`) reads its configuration from environment variables; the OtOpcUa host side reads the `AlarmHistorian` appsettings section. See the `OTOPCUA_HISTORIAN_*` rows in the environment-variable table below.
|
||||
The historian backend is the external **`ZB.MOM.WW.HistorianGateway`** sidecar, consumed as the
|
||||
`ZB.MOM.WW.HistorianGateway.Client` gRPC package (the retired Wonderware TCP sidecar is documented at
|
||||
[`docs/drivers/Historian.Wonderware.md`](drivers/Historian.Wonderware.md)). The OtOpcUa host reads three
|
||||
appsettings sections — `ServerHistorian` (read path + gateway connection), `ContinuousHistorization`
|
||||
(FasterLog outbox + recorder draining to `WriteLiveValues`), and `AlarmHistorian` (SQLite store-and-forward
|
||||
alarm sink draining to `SendEvent`). The gateway connection (endpoint / key / TLS) lives **only** in
|
||||
`ServerHistorian`; the other two sections source it from there.
|
||||
|
||||
The in-process **client-side** options POCO is `WonderwareHistorianClientOptions` (`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/WonderwareHistorianClientOptions.cs`), bound from the `AlarmHistorian` section: `Host`, `Port`, `UseTls`, `ServerCertThumbprint`, `SharedSecret`, `ConnectTimeout` (default 10s), `CallTimeout` (default 30s), `ProbeTimeoutSeconds` (`15`).
|
||||
The gateway API key is supplied via the environment variable **`ServerHistorian__ApiKey`** — never committed
|
||||
to config. The target gateway must run `RuntimeDb:Enabled=true` + `RuntimeDb:EventReadsEnabled=true`, and the
|
||||
key must carry the scopes `historian:read`, `historian:write`, `historian:tags:write`. See
|
||||
[`docs/Historian.md`](Historian.md) for the full key reference, the migration note (old Wonderware keys →
|
||||
gateway keys), and the deployment prerequisites.
|
||||
|
||||
---
|
||||
|
||||
@@ -139,29 +149,16 @@ All names are read in this repo's source via `Environment.GetEnvironmentVariable
|
||||
| `OTOPCUA_CONFIG_CONNECTION` | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/DesignTimeDbContextFactory.cs` (design-time / `dotnet ef` only) | Read at **design time** by `DesignTimeDbContextFactory.cs` for `dotnet ef` migrations. At **runtime** the server resolves the connection string from `ConnectionStrings:ConfigDb` (env form: `ConnectionStrings__ConfigDb`) via `configuration.GetConnectionString("ConfigDb")` in `ServiceCollectionExtensions.cs` — `OTOPCUA_CONFIG_CONNECTION` appears there only as a hint in an error message, not via `GetEnvironmentVariable`. No credential is embedded in source. |
|
||||
| `ASPNETCORE_ENVIRONMENT` | ASP.NET host builder (framework) | Selects `appsettings.{Environment}.json` (e.g. `Development`). |
|
||||
|
||||
### Historian sidecar (`OTOPCUA_HISTORIAN_*`)
|
||||
### Historian (`ServerHistorian__ApiKey`)
|
||||
|
||||
All read in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Program.cs`.
|
||||
The retired Wonderware sidecar's `OTOPCUA_HISTORIAN_*` environment variables are **gone** — no source reads
|
||||
them anymore. The historian backend is now the external HistorianGateway, configured through the
|
||||
`ServerHistorian` / `ContinuousHistorization` / `AlarmHistorian` appsettings sections (above). The single
|
||||
historian secret this repo reads from the environment is the gateway API key:
|
||||
|
||||
| Variable | Effect / default |
|
||||
|---|---|
|
||||
| `OTOPCUA_HISTORIAN_TCP_PORT` | TCP port the sidecar listens on. Default `32569`. Corresponds to `AlarmHistorian:Port` on the host side. |
|
||||
| `OTOPCUA_HISTORIAN_BIND` | TCP bind address for the sidecar. Default `0.0.0.0`. |
|
||||
| `OTOPCUA_HISTORIAN_TLS_ENABLED` | `true` enables TLS on the sidecar's TCP listener. Default `false`. Corresponds to `AlarmHistorian:UseTls` on the host side. |
|
||||
| `OTOPCUA_HISTORIAN_TLS_CERT` | PFX file path **or** `LocalMachine\My\<thumbprint>` to load the sidecar TLS server certificate from the machine store. |
|
||||
| `OTOPCUA_HISTORIAN_TLS_CERT_PASSWORD` | Password for a PFX-file certificate. Omit when using a machine-store cert. Never commit a value. |
|
||||
| `OTOPCUA_HISTORIAN_SECRET` | Per-process shared secret verified in the TCP Hello frame. Required (throws if unset). Corresponds to `AlarmHistorian:SharedSecret` on the host side. |
|
||||
| `OTOPCUA_HISTORIAN_ENABLED` | `true` opens the real Wonderware SDK connection; anything else → pipe-only mode (smoke/IPC tests). Default: not-true → pipe-only. |
|
||||
| `OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED` | `false` disables the alarm-event writer (sidecar rejects `WriteAlarmEvents`). Default `true` (when `ENABLED=true`). |
|
||||
| `OTOPCUA_HISTORIAN_INTEGRATED` | `false` → SQL auth (use `USER`/`PASS`); any other value → integrated security. Default: integrated. |
|
||||
| `OTOPCUA_HISTORIAN_SERVER` | Historian server hostname. Default `localhost`. |
|
||||
| `OTOPCUA_HISTORIAN_SERVERS` | Comma-separated multi-node server list (overrides single `SERVER` when set). |
|
||||
| `OTOPCUA_HISTORIAN_PORT` | Historian port. Default `32568`. |
|
||||
| `OTOPCUA_HISTORIAN_USER` | SQL username (when not integrated). |
|
||||
| `OTOPCUA_HISTORIAN_PASS` | SQL password (when not integrated). Never commit a value. |
|
||||
| `OTOPCUA_HISTORIAN_TIMEOUT_SEC` | Command timeout (seconds). Default `30`. |
|
||||
| `OTOPCUA_HISTORIAN_MAX_VALUES` | Max values returned per read. Default `10000`. |
|
||||
| `OTOPCUA_HISTORIAN_COOLDOWN_SEC` | Failure cooldown (seconds). Default `60`. |
|
||||
| `ServerHistorian__ApiKey` | The HistorianGateway peppered-HMAC key (`histgw_<id>_<secret>`) sent as `Authorization: Bearer`. Supply via environment — **never commit**. Required when `ServerHistorian:Enabled=true`. |
|
||||
|
||||
### Driver integration-test / fixture sim endpoints
|
||||
|
||||
|
||||
+15
-16
@@ -89,8 +89,9 @@ Members:
|
||||
Implementations: every driver ships a `*DriverProbe` in its driver project
|
||||
(e.g.
|
||||
[`Driver.Modbus/ModbusDriverProbe.cs`](../src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverProbe.cs)
|
||||
does a bare socket open/close), plus the Wonderware historian's
|
||||
`WonderwareHistorianDriverProbe`.
|
||||
does a bare socket open/close). The historian backend is the external
|
||||
HistorianGateway (consumed as a gRPC client package, not an `IDriver`), so it
|
||||
has no driver probe.
|
||||
|
||||
Flow: the AdminUI's `AdminProbeService`
|
||||
([`AdminUI/Clients/AdminProbeService.cs`](../src/Server/ZB.MOM.WW.OtOpcUa.AdminUI/Clients/AdminProbeService.cs))
|
||||
@@ -203,8 +204,8 @@ lifecycle. This is distinct from the driver capability `IHistoryProvider`:
|
||||
- `IHistoryProvider` is a *driver capability* — the server dispatches to it via
|
||||
the driver instance.
|
||||
- `IHistorianDataSource` is a *server registration* — the server resolves it by
|
||||
namespace and calls it directly, so one historian (e.g. Wonderware) can serve
|
||||
many drivers' nodes, and drivers can restart without dropping history
|
||||
namespace and calls it directly, so one historian (the HistorianGateway) can
|
||||
serve many drivers' nodes, and drivers can restart without dropping history
|
||||
availability.
|
||||
|
||||
The interface is `: IDisposable` and declares the full read surface as
|
||||
@@ -230,20 +231,18 @@ All values use the shared `DataValueSnapshot` / `HistoricalEvent` shapes;
|
||||
backend-specific quality/type encodings are translated to OPC UA `StatusCode`
|
||||
uints inside the data source.
|
||||
|
||||
Implementations:
|
||||
Implementation:
|
||||
|
||||
- `WonderwareHistorianClient`
|
||||
([`Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs`](../src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/WonderwareHistorianClient.cs))
|
||||
— the .NET 10 client that talks to the Wonderware historian sidecar over
|
||||
TCP (optional TLS). It implements both `IHistorianDataSource` (read paths) and
|
||||
`IAlarmHistorianWriter` (the alarm-event drain target; see
|
||||
- `GatewayHistorianDataSource`
|
||||
([`Driver.Historian.Gateway/GatewayHistorianDataSource.cs`](../src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway/GatewayHistorianDataSource.cs))
|
||||
— the read backend that talks gRPC to the external `ZB.MOM.WW.HistorianGateway`
|
||||
(via the `ZB.MOM.WW.HistorianGateway.Client` package, behind the
|
||||
`IHistorianGatewayClient` seam). The alarm-event drain target is the separate
|
||||
`GatewayAlarmHistorianWriter` (the gateway `SendEvent` path; see
|
||||
[AlarmHistorian.md](AlarmHistorian.md)).
|
||||
- `HistorianDataSource`
|
||||
([`Driver.Historian.Wonderware/Backend/HistorianDataSource.cs`](../src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Backend/HistorianDataSource.cs))
|
||||
— the in-process backend implementation behind the sidecar.
|
||||
|
||||
The optional Wonderware historian sidecar setup is described in
|
||||
[ServiceHosting.md](ServiceHosting.md).
|
||||
The HistorianGateway is the sole historian backend; its config keys and
|
||||
deployment prerequisites are in [Historian.md](Historian.md).
|
||||
|
||||
---
|
||||
|
||||
@@ -288,7 +287,7 @@ Folders:
|
||||
- [ReadWriteOperations.md](ReadWriteOperations.md) — the driver *capability*
|
||||
interfaces (read/write/subscribe) and resilience pipeline.
|
||||
- [ServiceHosting.md](ServiceHosting.md) — role gating, the Akka cluster, and
|
||||
the optional Wonderware historian sidecar.
|
||||
the external HistorianGateway backend.
|
||||
- [AlarmHistorian.md](AlarmHistorian.md) — the store-and-forward SQLite alarm
|
||||
sink that drains to `IAlarmHistorianWriter`.
|
||||
- [Redundancy.md](Redundancy.md) — driver stability tiers in the redundancy
|
||||
|
||||
+60
-50
@@ -3,9 +3,12 @@
|
||||
Phase C wires server-side OPC UA **HistoryRead** for authored equipment tags flagged
|
||||
historized. The feature is driver-agnostic: any equipment tag (Galaxy, Modbus, OpcUaClient,
|
||||
or any other driver) can be marked historized; the server dispatches all history reads to the
|
||||
registered `IHistorianDataSource` — today, the Wonderware sidecar client
|
||||
(`WonderwareHistorianClient`). No EF migration is required; the historian flag rides in the
|
||||
existing schemaless `TagConfig` JSON blob alongside the Phase B `alarm` object.
|
||||
registered `IHistorianDataSource` — the **HistorianGateway** read client
|
||||
(`GatewayHistorianDataSource`, talking gRPC to the external `ZB.MOM.WW.HistorianGateway` via the
|
||||
`ZB.MOM.WW.HistorianGateway.Client` package). No EF migration is required; the historian flag rides in
|
||||
the existing schemaless `TagConfig` JSON blob alongside the Phase B `alarm` object. (The bespoke
|
||||
Wonderware TCP sidecar backend this replaced was retired — see
|
||||
[drivers/Historian.Wonderware.md](drivers/Historian.Wonderware.md).)
|
||||
|
||||
Design reference: [docs/plans/2026-06-14-galaxy-phase-c-historian-design.md](plans/2026-06-14-galaxy-phase-c-historian-design.md).
|
||||
|
||||
@@ -60,11 +63,12 @@ and all HistoryRead calls on historized nodes return `GoodNoData` (empty, not an
|
||||
{
|
||||
"ServerHistorian": {
|
||||
"Enabled": false,
|
||||
"Host": "localhost",
|
||||
"Port": 32569,
|
||||
"UseTls": false,
|
||||
"ServerCertThumbprint": "",
|
||||
"SharedSecret": "",
|
||||
"Endpoint": "",
|
||||
"ApiKey": "",
|
||||
"UseTls": true,
|
||||
"AllowUntrustedServerCertificate": false,
|
||||
"CaCertificatePath": null,
|
||||
"CallTimeout": "00:00:30",
|
||||
"MaxTieClusterOverfetch": 65536
|
||||
}
|
||||
}
|
||||
@@ -72,20 +76,31 @@ and all HistoryRead calls on historized nodes return `GoodNoData` (empty, not an
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `Enabled` | bool | `false` | Enable the live `WonderwareHistorianClient`. `false` → `NullHistorianDataSource` (empty reads). |
|
||||
| `Host` | string | `localhost` | DNS name or IP of the machine running the historian sidecar. |
|
||||
| `Port` | int | `32569` | TCP port the sidecar listens on (`OTOPCUA_HISTORIAN_TCP_PORT`). |
|
||||
| `UseTls` | bool | `false` | Wrap the TCP connection in TLS. |
|
||||
| `ServerCertThumbprint` | string | — | Optional SHA-1 thumbprint to pin the sidecar's TLS certificate. Leave empty for CA-chain validation. |
|
||||
| `SharedSecret` | string | — | Shared secret token the sidecar expects on every connection. Required when `Enabled`. |
|
||||
| `Enabled` | bool | `false` | Enable the live `GatewayHistorianDataSource`. `false` → `NullHistorianDataSource` (empty reads). |
|
||||
| `Endpoint` | string | `""` | Absolute gateway URI, e.g. `https://host:5222`. Scheme selects transport (`https://` = TLS, `http://` = h2c plaintext). Required when `Enabled`. |
|
||||
| `ApiKey` | string | `""` | The gateway peppered-HMAC key (`histgw_<id>_<secret>`) sent as `Authorization: Bearer`. Required when `Enabled`. **Supply via env `ServerHistorian__ApiKey`.** |
|
||||
| `UseTls` | bool | `true` | Connect over TLS; must match the `Endpoint` scheme. |
|
||||
| `AllowUntrustedServerCertificate` | bool | `false` | Accept a self-signed / untrusted server certificate (dev / on-prem only). |
|
||||
| `CaCertificatePath` | string\|null | `null` | PEM CA file pinning the gateway's TLS chain. Null/empty uses the OS trust store. |
|
||||
| `CallTimeout` | TimeSpan | `00:00:30` | Per-call deadline applied to each unary gateway read. |
|
||||
| `MaxTieClusterOverfetch` | int | `65536` | Maximum samples the server will fetch in one shot to page through a tie cluster (multiple samples sharing one `SourceTimestamp`). A cluster larger than this ceiling fails `BadHistoryOperationUnsupported`. Raise to handle abnormally large tie clusters; the default covers all normal-data cases. |
|
||||
|
||||
> **Do not commit `SharedSecret` to `appsettings.json`.** Set it via an environment variable,
|
||||
> a secrets store, or a deployment-time overlay. The checked-in default is always empty.
|
||||
> **Do not commit `ApiKey` to `appsettings.json`.** Set it via the environment variable
|
||||
> `ServerHistorian__ApiKey`, a secrets store, or a deployment-time overlay. The checked-in default is
|
||||
> always empty.
|
||||
|
||||
> **Gateway-side prerequisites.** The target gateway must run `RuntimeDb:Enabled=true` (continuous
|
||||
> `WriteLiveValues`) + `RuntimeDb:EventReadsEnabled=true` (alarm-history `ReadEvents`), and the API key
|
||||
> must carry the scopes `historian:read`, `historian:write`, `historian:tags:write`.
|
||||
|
||||
> **Migration from the Wonderware backend.** Rename the old keys: `Host`/`Port` → `Endpoint`
|
||||
> (`https://host:5222`); `SharedSecret` → `ApiKey` (env `ServerHistorian__ApiKey`);
|
||||
> `ServerCertThumbprint` → `CaCertificatePath` (+ `UseTls` / `AllowUntrustedServerCertificate`).
|
||||
|
||||
The `ServerHistorian` section is independent of the `AlarmHistorian` section (the alarm
|
||||
write path). They share the same Wonderware sidecar process but hold separate client
|
||||
instances and separate `SharedSecret` values.
|
||||
write path) and the `ContinuousHistorization` section (driver-value capture). All three target the
|
||||
**same** gateway — but only `ServerHistorian` carries the connection (endpoint/key/TLS); the other two
|
||||
source it from there.
|
||||
|
||||
---
|
||||
|
||||
@@ -109,7 +124,8 @@ OPC UA client can discover historized capability from the node's attributes.
|
||||
**Equipment-folder event-notifier nodes** serve Event history. Every equipment folder that
|
||||
owns at least one alarm condition is already an event notifier; the server registers a
|
||||
`sourceName` (the equipment id) for each such folder and maps event history reads to the
|
||||
Wonderware historian using that source. Event-field projection supports the standard
|
||||
HistorianGateway using that source. (Alarm-history `ReadEvents` requires the gateway running
|
||||
`RuntimeDb:EventReadsEnabled=true`.) Event-field projection supports the standard
|
||||
`BaseEventType` select clauses — `EventId`, `SourceName`, `Time`, `ReceiveTime`, `Message`,
|
||||
and `Severity`; an unsupported select operand returns a null field (spec-conformant).
|
||||
|
||||
@@ -123,7 +139,7 @@ upstream `HistoryEvent` onto `HistoricalEvent` — the same six-field projection
|
||||
node-manager itself projects when serving event history. This is a **driver-level capability**:
|
||||
the OpcUaClient driver acts as a passthrough to whatever historian the upstream server exposes,
|
||||
and is independent of the single server-side `IHistorianDataSource` backend
|
||||
(`WonderwareHistorianClient` / `NullHistorianDataSource`) that the OtOpcUa node-manager
|
||||
(`GatewayHistorianDataSource` / `NullHistorianDataSource`) that the OtOpcUa node-manager
|
||||
dispatches HistoryRead to for tags on other drivers (Galaxy, Modbus, S7, etc.).
|
||||
|
||||
### Graceful degradation
|
||||
@@ -138,7 +154,7 @@ dispatches HistoryRead to for tags on other drivers (Galaxy, Modbus, S7, etc.).
|
||||
|
||||
A historized node with no historian configured never returns an error status — it returns
|
||||
empty. This means a deployment can author and publish historized tags before the historian
|
||||
sidecar is provisioned, without producing error spikes in connected clients.
|
||||
gateway is provisioned, without producing error spikes in connected clients.
|
||||
|
||||
### Continuation-point paging (Raw)
|
||||
|
||||
@@ -187,22 +203,14 @@ are disposed when the session closes). Resuming an unknown / evicted / released
|
||||
`BadContinuationPointInvalid`. `releaseContinuationPoints` drops the stored cursors without reading
|
||||
data.
|
||||
|
||||
### Total aggregate derivation
|
||||
### Total aggregate
|
||||
|
||||
The OPC UA `Total` aggregate is **supported** over the Wonderware backend. Because the
|
||||
Wonderware `AnalogSummary` query exposes no `Total` column, the value is derived client-side
|
||||
using the time-integral identity:
|
||||
|
||||
> **Total = time-weighted Average × interval-seconds**
|
||||
|
||||
The wire request is issued with the `Average` column; each returned bucket's value is
|
||||
multiplied by `interval.TotalSeconds` before the result is returned to the OPC UA client.
|
||||
Bucket status codes and timestamps are preserved unchanged. Null (unavailable) Average
|
||||
buckets produce a null Total (`BadNoData` downstream) — the scaling is not applied.
|
||||
|
||||
This derivation is exact for piecewise-constant (step) signals. For continuously varying
|
||||
signals it is an approximation identical to the one Wonderware would apply internally, so
|
||||
the result is consistent with what AVEVA Historian reports for the same window.
|
||||
The OPC UA `Total` aggregate is **supported** over the HistorianGateway backend. The gateway exposes a
|
||||
native **`Integral`** retrieval mode, so `Total` maps straight to it (`HistoryAggregateType.Total →
|
||||
RetrievalMode.Integral`) — no client-side scaling. (This replaces the retired Wonderware path, which had no
|
||||
`Total` column and derived it client-side as time-weighted `Average × interval-seconds`.) `Count` is
|
||||
likewise a native gateway mode. Bucket status codes and timestamps are preserved unchanged; empty / null
|
||||
buckets surface as `BadNoData`.
|
||||
|
||||
### Known limitations
|
||||
|
||||
@@ -213,12 +221,12 @@ the result is consistent with what AVEVA Historian reports for the same window.
|
||||
read and there is no "full page ⇒ maybe more" signal to page on. Returning the full result with
|
||||
no continuation point is spec-conformant.
|
||||
- **No modified-value history** (`HistoryReadModified`). Requests for modified values return
|
||||
`BadHistoryOperationUnsupported`. This is **infra-gated, not a server-code gap**: the AVEVA
|
||||
Wonderware historian backend (`IHistorianDataSource`, the TCP sidecar client) exposes only a
|
||||
current-value read path — there is no modified/edited-history surface to source the data from. The
|
||||
server-side override is in place (it cleanly rejects modified reads per node) and `IsReadModified`
|
||||
is honoured; serving real modified-value history is unblocked only once the historian client/sidecar
|
||||
grows a modified-read RPC. Until then, rejecting is the correct, spec-conformant behaviour.
|
||||
`BadHistoryOperationUnsupported`. This is **infra-gated, not a server-code gap**: the HistorianGateway
|
||||
backend (`GatewayHistorianDataSource`) exposes only a current-value read path — there is no
|
||||
modified/edited-history surface to source the data from. The server-side override is in place (it cleanly
|
||||
rejects modified reads per node) and `IsReadModified` is honoured; serving real modified-value history is
|
||||
unblocked only once the gateway grows a modified-read RPC. Until then, rejecting is the correct,
|
||||
spec-conformant behaviour.
|
||||
|
||||
### Redundancy and authorization
|
||||
|
||||
@@ -309,14 +317,16 @@ above), but is not exposed by this bundled CLI.
|
||||
|
||||
## Live /run gate
|
||||
|
||||
The live read gate requires the Wonderware historian sidecar running on the WW Historian VM
|
||||
(`10.100.0.48`) and AVEVA Historian healthy. Set `ServerHistorian:Enabled=true` with the
|
||||
correct `Host`, `Port`, and `SharedSecret` in `appsettings.json` (or via environment
|
||||
variables), then deploy and publish at least one historized Galaxy tag. The gate is
|
||||
operator-driven — it is not part of the local docker-dev rig.
|
||||
The live read gate requires a reachable `ZB.MOM.WW.HistorianGateway` (VPN to `wonder-sql-vd03`) with the
|
||||
AVEVA Historian behind it healthy. Set `ServerHistorian:Enabled=true` with the correct `Endpoint`
|
||||
(`https://host:5222`) and supply `ServerHistorian__ApiKey` via the environment, then deploy and publish at
|
||||
least one historized Galaxy tag. The gate is operator-driven — it is not part of the local docker-dev rig.
|
||||
The gateway-backed driver also ships an env-gated live suite (`Category=LiveIntegration`); see the
|
||||
`HISTGW_GATEWAY_ENDPOINT` / `HISTGW_GATEWAY_APIKEY` / `HISTGW_TEST_TAG` / `HISTGW_WRITE_SANDBOX_TAG` /
|
||||
`HISTGW_ALARM_SOURCE` env vars (it skips cleanly when they are absent).
|
||||
|
||||
See [AlarmHistorian.md](AlarmHistorian.md) for the historian sidecar setup and
|
||||
[ServiceHosting.md](ServiceHosting.md) for the sidecar service configuration.
|
||||
See [AlarmHistorian.md](AlarmHistorian.md) for the alarm write path and
|
||||
[ServiceHosting.md](ServiceHosting.md) for the (external) HistorianGateway deployment.
|
||||
|
||||
---
|
||||
|
||||
@@ -373,7 +383,7 @@ phases and are recorded here so future audits don't re-flag them.
|
||||
## See also
|
||||
|
||||
- [docs/plans/2026-06-14-galaxy-phase-c-historian-design.md](plans/2026-06-14-galaxy-phase-c-historian-design.md) — full design and implementation notes
|
||||
- [AlarmHistorian.md](AlarmHistorian.md) — alarm write path; shares the same Wonderware sidecar
|
||||
- [AlarmHistorian.md](AlarmHistorian.md) — alarm write path; drains to the same HistorianGateway (`SendEvent`)
|
||||
- [AlarmTracking.md](AlarmTracking.md) — OPC UA Part 9 alarm surface (event history source)
|
||||
- [Client.CLI.md](Client.CLI.md) — full `historyread` flag reference
|
||||
- [ScriptedAlarms.md](ScriptedAlarms.md) §"Native driver alarms" — the Phase B `alarm` object in `TagConfig` (parallel carrier)
|
||||
|
||||
+1
-1
@@ -64,7 +64,7 @@ For Modbus / S7 / AB CIP / AB Legacy / TwinCAT / FOCAS / OPC UA Client specifics
|
||||
| [security.md](security.md) | Transport security profiles, LDAP auth, ACL trie, role grants, OTOPCUA0001 analyzer |
|
||||
| [Redundancy.md](Redundancy.md) | `RedundancyCoordinator`, `ServiceLevelCalculator`, apply-lease, Prometheus metrics |
|
||||
| [Reservations.md](Reservations.md) | Fleet-wide ZTag / SAPID external-ID reservations — publish-time claim, release flow |
|
||||
| [ServiceHosting.md](ServiceHosting.md) | Single fused `OtOpcUa.Host` binary install/uninstall with `OTOPCUA_ROLES` gating, plus the optional `OtOpcUaWonderwareHistorian` sidecar |
|
||||
| [ServiceHosting.md](ServiceHosting.md) | Single fused `OtOpcUa.Host` binary install/uninstall with `OTOPCUA_ROLES` gating; the historian backend is the external HistorianGateway |
|
||||
| [StatusDashboard.md](StatusDashboard.md) | Pointer — superseded by [v2/admin-ui.md](v2/admin-ui.md) |
|
||||
|
||||
### Client tooling
|
||||
|
||||
+13
-6
@@ -2,14 +2,15 @@
|
||||
|
||||
## Overview
|
||||
|
||||
A production OtOpcUa deployment runs **one binary per node**, plus the optional Wonderware historian sidecar:
|
||||
A production OtOpcUa deployment runs **one binary per node**. The historian backend is the external
|
||||
`ZB.MOM.WW.HistorianGateway`, deployed separately (not installed by this repo's scripts):
|
||||
|
||||
| Process | Project | Runtime | Platform | Responsibility |
|
||||
|---|---|---|---|---|
|
||||
| **OtOpcUa Host** | `src/Server/ZB.MOM.WW.OtOpcUa.Host` | .NET 10 | AnyCPU | Single fused binary. `OTOPCUA_ROLES` env decides what to mount: `admin` (Blazor + auth + control-plane singletons), `driver` (OPC UA endpoint + per-driver actors), or both. |
|
||||
| **OtOpcUa Wonderware Historian** *(optional)* | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware` | .NET Framework 4.8 | x64 (64-bit) | Out-of-process sidecar exposing the Wonderware Historian SDK over TCP (optional TLS). Required only when `AlarmHistorian:Enabled=true`. May run on the same machine or a remote host. |
|
||||
| **ZB.MOM.WW.HistorianGateway** *(external — separate deployment)* | not in this repo | .NET 10 | — | The sole historian backend. OtOpcUa talks gRPC to it (via the `ZB.MOM.WW.HistorianGateway.Client` package) for HistoryRead, alarm `SendEvent`, and continuous `WriteLiveValues`. Must run `RuntimeDb:Enabled=true` + `RuntimeDb:EventReadsEnabled=true`; the API key must carry `historian:read` + `historian:write` + `historian:tags:write`. |
|
||||
|
||||
Galaxy access still uses the separately-installed **mxaccessgw** sidecar (see `docs/v2/Galaxy.ParityRig.md`); the gateway owns the MXAccess COM bitness constraint (its worker is x86 net48). Nothing in the OtOpcUa repo carries that constraint anymore.
|
||||
Galaxy access still uses the separately-installed **mxaccessgw** sidecar (see `docs/v2/Galaxy.ParityRig.md`); the gateway owns the MXAccess COM bitness constraint (its worker is x86 net48). Nothing in the OtOpcUa repo carries that constraint anymore. (The bespoke Wonderware historian sidecar this deployment used to ship was retired — see [drivers/Historian.Wonderware.md](drivers/Historian.Wonderware.md).)
|
||||
|
||||
> **v2 change.** v1's separate `OtOpcUa.Server` + `OtOpcUa.Admin` Windows services merged into a single role-gated `OtOpcUa.Host` binary. Two installers became one (with a `-Roles` parameter). The whole DI graph is composed in `OtOpcUa.Host/Program.cs`; per-role wiring is conditional on the env var.
|
||||
|
||||
@@ -72,14 +73,20 @@ Both admin and driver nodes expose:
|
||||
|
||||
Used by Traefik for the active-leader-only routing pattern (see [Architecture-v2.md](v2/Architecture-v2.md)).
|
||||
|
||||
## OtOpcUa Wonderware Historian (optional)
|
||||
## Historian backend (HistorianGateway — external)
|
||||
|
||||
IPC contract types live in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/`; sidecar TCP server in `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Ipc/`. The sidecar listens on TCP port 32569 by default; `Install-Services.ps1 -InstallWonderwareHistorian` adds the Windows Firewall inbound rule. The host and sidecar may run on different machines — configure `AlarmHistorian:Host` + `AlarmHistorian:Port` (and optionally `AlarmHistorian:UseTls`) on the OtOpcUa host side. See [Historian.Wonderware.md](drivers/Historian.Wonderware.md) for the full transport and security reference.
|
||||
The historian backend is the external `ZB.MOM.WW.HistorianGateway`, deployed and operated separately (not
|
||||
installed by `Install-Services.ps1`). OtOpcUa connects to it over gRPC via the
|
||||
`ZB.MOM.WW.HistorianGateway.Client` package — configure the `ServerHistorian:Endpoint` (`https://host:5222`)
|
||||
and supply `ServerHistorian__ApiKey` via the environment on the OtOpcUa host side. The gateway must run with
|
||||
`RuntimeDb:Enabled=true` + `RuntimeDb:EventReadsEnabled=true` and an API key carrying `historian:read` +
|
||||
`historian:write` + `historian:tags:write`. See [Historian.md](Historian.md) for the full config-key and
|
||||
deployment-prerequisite reference. (The retired Wonderware TCP sidecar: [Historian.Wonderware.md](drivers/Historian.Wonderware.md).)
|
||||
|
||||
## Install / Uninstall
|
||||
|
||||
- `scripts/install/Install-Services.ps1 -Roles admin,driver` — installs `OtOpcUaHost`.
|
||||
- `scripts/install/Uninstall-Services.ps1` — stops + removes the host service (and the historian sidecar if installed).
|
||||
- `scripts/install/Uninstall-Services.ps1` — stops + removes the host service. (The historian backend is the external HistorianGateway — not installed/removed by these scripts.)
|
||||
|
||||
## Logging
|
||||
|
||||
|
||||
+9
-8
@@ -120,14 +120,15 @@ drivers:
|
||||
| TwinCAT | Symbol path, data type, etc. |
|
||||
| FOCAS | PMC address, data type, etc. |
|
||||
| **OpcUaClient** | `FullName` (the remote OPC UA node id string) |
|
||||
| **Historian.Wonderware** | `FullName` (the Wonderware tagname to read) |
|
||||
|
||||
**OpcUaClient** and **Historian.Wonderware** were previously raw-JSON
|
||||
fallback only; they now have first-class typed editors that expose a single
|
||||
`FullName` field (PascalCase JSON key, consistent with the Galaxy editor
|
||||
convention). Both are registered in `TagConfigEditorMap` and
|
||||
`TagConfigValidator`; unknown keys in the stored JSON blob are preserved on
|
||||
round-trip.
|
||||
**OpcUaClient** was previously raw-JSON fallback only; it now has a first-class
|
||||
typed editor that exposes a single `FullName` field (PascalCase JSON key,
|
||||
consistent with the Galaxy editor convention). It is registered in
|
||||
`TagConfigEditorMap` and `TagConfigValidator`; unknown keys in the stored JSON
|
||||
blob are preserved on round-trip.
|
||||
|
||||
> The historian backend is the external HistorianGateway (no OtOpcUa-side tag
|
||||
> driver / tag-config editor). See [Historian.md](Historian.md).
|
||||
|
||||
Drivers not yet listed above (e.g. Galaxy — which uses the Galaxy address
|
||||
picker described below) still use the generic raw-`TagConfig`-JSON textarea.
|
||||
@@ -226,7 +227,7 @@ Combined with historization (values are arrays — history of the whole array sn
|
||||
|
||||
- **Array writes** (inbound client→device write of an array value) — tagged for a follow-up phase.
|
||||
- **Multi-dimensional arrays** (`ValueRank > 1`) — not supported; all arrays are 1-D.
|
||||
- **Array historization** — a historized array tag materialises with the correct `Historizing` flag, but the Wonderware sidecar historian treats the value as an opaque blob; per-element history is out of scope.
|
||||
- **Array historization** — a historized array tag materialises with the correct `Historizing` flag, but the historian backend treats the value as an opaque blob; per-element history is out of scope. (Continuous historization is numeric-analog only — array / non-numeric values are not recorded.)
|
||||
|
||||
See the individual driver docs under `docs/drivers/` for per-driver implementation details.
|
||||
|
||||
|
||||
+2
-2
@@ -96,7 +96,7 @@ What the engine pulls driver-tag values from. Reads are **synchronous** because
|
||||
|
||||
Fire-and-forget sink for evaluation results when `VirtualTagDefinition.Historize = true`. Implementations must queue internally and drain on their own cadence — a slow historian must not block script evaluation. `NullHistoryWriter.Instance` is the no-op default. Scripted-alarm emissions flow through `Core.AlarmHistorian` via `Phase7EngineComposer.RouteToHistorianAsync` (a separate concern; see [AlarmTracking.md](AlarmTracking.md)).
|
||||
|
||||
**Equipment-namespace path (H5).** The `Historize` flag is threaded end-to-end on the equipment path: `VirtualTag.Historize` → composer + artifact-decode (byte-parity) → `EquipmentVirtualTagPlan.Historize` → `VirtualTagHostActor`, which calls `IHistoryWriter.Record(nodeId, snapshot)` for every historized result (in addition to publishing the live value). The writer is injectable via DI — `DriverHostActor` resolves `IHistoryWriter` (`TryAddSingleton`, `NullHistoryWriter` default) and threads it into `VirtualTagHostActor`. **The durable AVEVA data-value sink is infra-gated**: the Wonderware historian sidecar exposes only HistoryRead + alarm-event writes (no live-data `WriteDataValues` RPC), so the production default stays `NullHistoryWriter` until that backend exists. A deployment can bind a custom `IHistoryWriter` via DI today.
|
||||
**Equipment-namespace path (H5).** The `Historize` flag is threaded end-to-end on the equipment path: `VirtualTag.Historize` → composer + artifact-decode (byte-parity) → `EquipmentVirtualTagPlan.Historize` → `VirtualTagHostActor`, which calls `IHistoryWriter.Record(nodeId, snapshot)` for every historized result (in addition to publishing the live value). The writer is injectable via DI — `DriverHostActor` resolves `IHistoryWriter` (`TryAddSingleton`, `NullHistoryWriter` default) and threads it into `VirtualTagHostActor`. **This `IHistoryWriter` seam still ships no durable binding** (`NullHistoryWriter` default). Durable continuous historization of driver/virtual values is now handled by the separate `ContinuousHistorizationRecorder` (it taps the dependency-mux value fan-out → a crash-safe FasterLog outbox → the HistorianGateway's `WriteLiveValues` path; see [Historian.md](Historian.md)), not through this seam. A deployment can still bind a custom `IHistoryWriter` via DI.
|
||||
|
||||
## Dispatch integration
|
||||
|
||||
@@ -114,7 +114,7 @@ Per [ADR-002](v2/implementation/adr-002-driver-vs-virtual-dispatch.md) Option B,
|
||||
`ITagUpstreamSource` and `IHistoryWriter` are the two ports the engine requires from its host. Both live in `Core.VirtualTags`. In the v2 actor system:
|
||||
|
||||
- **Upstream-tag feed.** `DependencyMuxActor` (`src/Server/ZB.MOM.WW.OtOpcUa.Runtime/VirtualTags/DependencyMuxActor.cs`) routes `DriverInstanceActor.AttributeValuePublished` events to the `VirtualTagActor` instances that declared interest in those tag refs. Each `VirtualTagActor` holds the in-memory per-tag dependency map; the `IVirtualTagEvaluator` (`RoslynVirtualTagEvaluator`) receives the dependency snapshot synchronously on the actor message thread. Reads of never-pushed dependency refs return `null` values in the dependency snapshot.
|
||||
- **`IHistoryWriter`** — the equipment-namespace path threads `Historize` end-to-end and `VirtualTagHostActor` invokes the injected writer on historized results (H5); the writer is resolved through `DriverHostActor` DI with a `NullHistoryWriter` default. The standalone `VirtualTagEngine` likewise receives `NullHistoryWriter` by default. No *durable* writer ships because the historian sidecar has no live-data write RPC (infra-gated) — see the `IHistoryWriter` section above.
|
||||
- **`IHistoryWriter`** — the equipment-namespace path threads `Historize` end-to-end and `VirtualTagHostActor` invokes the injected writer on historized results (H5); the writer is resolved through `DriverHostActor` DI with a `NullHistoryWriter` default. The standalone `VirtualTagEngine` likewise receives `NullHistoryWriter` by default. No *durable* writer ships on this seam — durable continuous historization now flows through the separate `ContinuousHistorizationRecorder` → HistorianGateway `WriteLiveValues` path (see the `IHistoryWriter` section above and [Historian.md](Historian.md)).
|
||||
|
||||
## Composition
|
||||
|
||||
|
||||
@@ -67,3 +67,33 @@ returning `Bad_WaitingForInitialData` until the rebuilt `ZB.MOM.WW.OtOpcUa.Drive
|
||||
self-contained host publish) is deployed to `E:\ApiInstall\OtOpcUa\` and `OtOpcUaHost` is restarted. Once
|
||||
redeployed, `parts-count`/`parts-required` should go Good (FixedTree + PMC/Parameter still pending the
|
||||
follow-on v3 command work).
|
||||
|
||||
## FixedTree under the Equipment node (feature built 2026-06-26)
|
||||
|
||||
The FOCAS **FixedTree** (Identity / Axes / Spindle / Program / Timers) now surfaces under the equipment as
|
||||
read-only value nodes, via a generic post-connect `ITagDiscovery` injection feature (branch
|
||||
`feat/focas-fixedtree-equipment-injection`; design + plan at
|
||||
[`docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](../plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md)
|
||||
and [`…-injection.md`](../plans/2026-06-26-otopcua-fixedtree-equipment-injection.md)). After the driver
|
||||
connects and its `FixedTreeCache` populates (~0–2 s), nodes are grafted at e.g.
|
||||
`ns=2;s=EQ-3686c0272279/FOCAS/Identity/SeriesNumber` and `…/FOCAS/Axes/X/AbsolutePosition`, carrying live
|
||||
values through the same path as the authored `parts-count`/`parts-required` tags, and survive redeploys.
|
||||
|
||||
**✅ LIVE-VALIDATED 2026-06-26.** A full self-contained Host overlay built from
|
||||
`feat/focas-fixedtree-equipment-injection` @ `37cac5de` was deployed to `E:\ApiInstall\OtOpcUa`
|
||||
(stop → full backup `E:\ApiInstall\OtOpcUa_bak-20260626111416` → robocopy overlay preserving
|
||||
`appsettings*.json` + `pki\` → restart; `/healthz` 200 Healthy, member Up ADMIN+DRIVER, OPC `:4840`
|
||||
listening). After the FOCAS driver reconnected, the host log recorded
|
||||
`DriverHost …: injected 57 discovered node(s) for driver focas-z-34184 under EQ-3686c0272279` and
|
||||
`AddressSpaceApplier: discovered nodes materialised under EQ-3686c0272279 (folders=14, vars=57)` —
|
||||
no `Exception`/`NotSupportedException`. Verified via the OtOpcUa CLI from a remote client:
|
||||
|
||||
- `browse --recursive ns=2;s=EQ-3686c0272279` → new `FOCAS` subtree (`Identity`, `Axes` X/Y/Z/B/C/AA
|
||||
+ `Actual`, `Spindle/S1`, `Program`, `OperationMode`, `Timers`), stable + idempotent across repeat
|
||||
browses (no churn); the device-host folder (`10.201.31.5:8193`) was collapsed as designed.
|
||||
- Sample reads, all **Good** (`Status 0x0`) with fresh/advancing source times:
|
||||
`FOCAS/Identity/SeriesNumber=G431`, `CncType=31`, `AxisCount=7`,
|
||||
`FOCAS/Axes/X/AbsolutePosition=2801574` (live), `FOCAS/OperationMode/ModeText=TJOG`.
|
||||
- Authored `parts-count`/`parts-required` still Good — no regression.
|
||||
|
||||
Rollback point retained at `E:\ApiInstall\OtOpcUa_bak-20260626111416`.
|
||||
|
||||
@@ -1,156 +1,46 @@
|
||||
# Wonderware Historian Backend
|
||||
# Wonderware Historian Backend — RETIRED
|
||||
|
||||
The Wonderware Historian backend is **not a tag driver** — it has no address
|
||||
space, no `IDriver` lifecycle, and exposes no PLC. It is a **server-side
|
||||
historian sink**: an optional sidecar that gives OtOpcUa read access to AVEVA
|
||||
System Platform (Wonderware) Historian history and a write-back path for alarm
|
||||
events. It runs only when `AlarmHistorian:Enabled=true`.
|
||||
> **This backend has been retired.** The bespoke Wonderware TCP/ArchestrA historian sidecar
|
||||
> (`OtOpcUaWonderwareHistorian`) and its `Driver.Historian.Wonderware*` projects — plus the vestigial
|
||||
> `Historian.Wonderware` driver type — were removed. **HistorianGateway is now the sole historian
|
||||
> backend** for OtOpcUa (read, alarm-write, and continuous historization).
|
||||
|
||||
The host connects to the sidecar over **TCP** (plaintext in dev, optional TLS
|
||||
in prod), so the OtOpcUa host no longer needs to be on the same machine as the
|
||||
sidecar — a remote host on a different VM is fully supported.
|
||||
## What replaced it
|
||||
|
||||
For the sidecar's place in a deployment, see
|
||||
[ServiceHosting.md](../ServiceHosting.md). For the alarm-history store-and-forward
|
||||
flow that drains into it, see [AlarmHistorian.md](../AlarmHistorian.md).
|
||||
OtOpcUa now consumes the **`ZB.MOM.WW.HistorianGateway`** sidecar through the Gitea-feed
|
||||
**`ZB.MOM.WW.HistorianGateway.Client`** gRPC package (`historian_gateway.v1`), behind the
|
||||
`IHistorianGatewayClient` seam in `ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway`:
|
||||
|
||||
## Architecture
|
||||
- **HistoryRead** → `GatewayHistorianDataSource` over the `ServerHistorian` appsettings section.
|
||||
- **Alarm history** → `GatewayAlarmHistorianWriter` (the gateway `SendEvent` path) behind the durable
|
||||
`SqliteStoreAndForwardSink`; alarm-history `ReadEvents` needs the gateway running
|
||||
`RuntimeDb:EventReadsEnabled=true`.
|
||||
- **Continuous historization** → a crash-safe FasterLog outbox + `ContinuousHistorizationRecorder`
|
||||
draining to the gateway's `WriteLiveValues` (`ContinuousHistorization` section); needs the gateway
|
||||
running `RuntimeDb:Enabled=true`.
|
||||
- **Tag provisioning** → `AddressSpaceApplier` fires a non-blocking `IHistorianProvisioning` `EnsureTags`
|
||||
hook for added historized tags.
|
||||
|
||||
```
|
||||
+-------------------------------------------+
|
||||
| OtOpcUa Host (.NET 10 AnyCPU) |
|
||||
| Server.History.IHistoryRouter --read--+--+
|
||||
| Core.AlarmHistorian.SqliteStore | |
|
||||
| AndForwardSink --write----+--+
|
||||
| WonderwareHistorianClient (.NET 10) | |
|
||||
+-------------------------------------------+ |
|
||||
| TCP (optional TLS)
|
||||
MessagePack frames | shared-secret Hello auth
|
||||
v
|
||||
+-------------------------------------------+
|
||||
| OtOpcUaWonderwareHistorian (sidecar) |
|
||||
| net48 / x64 |
|
||||
| TcpFrameServer + HistorianFrameHandler |
|
||||
| HistorianDataSource (reads) |
|
||||
| SdkAlarmHistorianWriteBackend (writes) |
|
||||
| aahClientManaged / HistorianAccess |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
The gateway API key must carry the scopes `historian:read`, `historian:write`, `historian:tags:write`.
|
||||
|
||||
The split exists because the AVEVA Historian SDK (`aahClientManaged` +
|
||||
native `aahClient.dll`) is .NET Framework 4.8 / x64 — so it lives out-of-process
|
||||
in the sidecar, and everything in the OtOpcUa host stays .NET 10 AnyCPU. The
|
||||
host never references the SDK; it speaks the TCP contract only. Because the
|
||||
transport is TCP, the host and sidecar can run on different machines.
|
||||
## Where to read now
|
||||
|
||||
### Transport & security
|
||||
- **[../Historian.md](../Historian.md)** — the full historian guide (read path, alarm path, continuous
|
||||
historization, config keys, migration note).
|
||||
- **[README.md](README.md)** — driver / back-end overview.
|
||||
- **[../ServiceHosting.md](../ServiceHosting.md)** — deployment (the historian backend is the external
|
||||
HistorianGateway, not an installed sidecar).
|
||||
|
||||
The sidecar listens on a configurable TCP port (`OTOPCUA_HISTORIAN_TCP_PORT`,
|
||||
default **32569**) and bind address (`OTOPCUA_HISTORIAN_BIND`, default `0.0.0.0`).
|
||||
`Install-Services.ps1` adds a Windows Firewall inbound rule for the port
|
||||
automatically.
|
||||
## Migration
|
||||
|
||||
**TLS (optional, recommended for cross-machine deployments):**
|
||||
Set `OTOPCUA_HISTORIAN_TLS_ENABLED=true` on the sidecar and supply the server
|
||||
certificate via `OTOPCUA_HISTORIAN_TLS_CERT` (PFX file path, or
|
||||
`LocalMachine\My\<thumbprint>` for a cert already in the machine store) and
|
||||
`OTOPCUA_HISTORIAN_TLS_CERT_PASSWORD` if the PFX is password-protected. On the
|
||||
client/host side set `AlarmHistorian:UseTls=true`; optionally set
|
||||
`ServerCertThumbprint` to pin the server certificate's SHA-1 thumbprint instead
|
||||
of relying on normal CA-chain validation.
|
||||
Deployments that carried the old `ServerHistorian` Wonderware keys must rename them:
|
||||
|
||||
**Shared secret (required in all modes):**
|
||||
Regardless of whether TLS is on, the client always sends a `Hello` frame
|
||||
carrying the `SharedSecret`; the sidecar rejects connections where the secret
|
||||
does not match. The Windows-SID pipe ACL from the previous named-pipe transport
|
||||
is replaced by this combination of TLS + shared secret.
|
||||
| Old (Wonderware) key | New (gateway) key |
|
||||
|---|---|
|
||||
| `ServerHistorian:Host` + `:Port` | `ServerHistorian:Endpoint` (`https://host:5222`) |
|
||||
| `ServerHistorian:SharedSecret` | `ServerHistorian:ApiKey` (supply via env `ServerHistorian__ApiKey`) |
|
||||
| `ServerHistorian:ServerCertThumbprint` | `ServerHistorian:CaCertificatePath` (+ `UseTls` / `AllowUntrustedServerCertificate`) |
|
||||
|
||||
**TLS troubleshooting note:** If TLS fails on every connection attempt, the
|
||||
most likely cause is a missing private key or an ACL on the key file — the
|
||||
sidecar loads the certificate with `MachineKeySet` (required for service
|
||||
accounts with no loaded user profile), and `SslStream` defers private-key
|
||||
access to the first handshake, so a bad key surfaces as repeated connection
|
||||
failures (→ exit 2 → NSSM restart), not a startup error.
|
||||
|
||||
## Project split
|
||||
|
||||
| Project | Target | Role |
|
||||
|---------|--------|------|
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/` | net48 / x64 | The **sidecar** (`OutputType=Exe`). Hosts the TCP server, the historian reader, and the alarm-write backend bound to the AVEVA SDK |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client/` | net10.0 | `WonderwareHistorianClient` — the in-host TCP client consumed by the history router and the alarm sink |
|
||||
| `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts/` | net10.0 | `WonderwareHistorianClientOptions` (host, port, TLS, shared secret, timeouts) |
|
||||
|
||||
> The csproj targets **net48 / x64** (`PlatformTarget=x64`) — the AVEVA Historian
|
||||
> 2020 SDK ships an x64 `aahClientManaged` build; the earlier x86 default was an
|
||||
> inherited v1 artifact, not a constraint of the Historian SDK.
|
||||
|
||||
## What it does
|
||||
|
||||
The sidecar exposes two surfaces, both over the same TCP connection:
|
||||
|
||||
### Read path — `IHistorianDataSource`
|
||||
|
||||
`HistorianDataSource` (in the sidecar) reads history through the
|
||||
`aahClientManaged` SDK; `WonderwareHistorianClient` (in the host) implements
|
||||
`IHistorianDataSource` and maps returned samples back to OPC UA `DataValue`s for
|
||||
`Server.History.IHistoryRouter`. The read surface is:
|
||||
|
||||
| Call | Maps to |
|
||||
|------|---------|
|
||||
| `ReadRawAsync` | Raw historical samples for a tag over a time range |
|
||||
| `ReadProcessedAsync` / `ReadAggregateAsync` | Aggregated samples at an interval |
|
||||
| `ReadAtTimeAsync` | Samples at specific timestamps |
|
||||
| `ReadEventsAsync` | Historical events for a source |
|
||||
| `GetHealthSnapshot` | Connection health for the host-side health surface |
|
||||
|
||||
### Write path — alarm-historian write-back
|
||||
|
||||
`WonderwareHistorianClient` also implements `IAlarmHistorianWriter`. Alarm events
|
||||
are drained into the sidecar from `Core.AlarmHistorian.SqliteStoreAndForwardSink`
|
||||
and persisted by `SdkAlarmHistorianWriteBackend` via
|
||||
`HistorianAccess.AddStreamedValue(HistorianEvent, out HistorianAccessError)`. The
|
||||
production writer is wrapped by `AahClientManagedAlarmEventWriter`, which handles
|
||||
batch orchestration and per-event `HistorianAccessError` outcome classification
|
||||
(connection-class errors are retryable; malformed-argument errors are not).
|
||||
|
||||
The alarm write path can be disabled independently of reads by setting
|
||||
`OTOPCUA_HISTORIAN_ALARM_WRITE_ENABLED=false` — the sidecar then rejects
|
||||
`WriteAlarmEvents` frames while still serving history reads.
|
||||
|
||||
## Hosting and IPC
|
||||
|
||||
- **Process**: `OtOpcUaWonderwareHistorian`, installed/managed by
|
||||
`scripts/install/` (`Install-Services.ps1 -InstallWonderwareHistorian`).
|
||||
- **Spawn config**: TCP port and bind address are set via
|
||||
`OTOPCUA_HISTORIAN_TCP_PORT` (default 32569) and `OTOPCUA_HISTORIAN_BIND`
|
||||
(default `0.0.0.0`). TLS is controlled by `OTOPCUA_HISTORIAN_TLS_ENABLED` /
|
||||
`OTOPCUA_HISTORIAN_TLS_CERT` / `OTOPCUA_HISTORIAN_TLS_CERT_PASSWORD`. The
|
||||
shared secret is passed via `OTOPCUA_HISTORIAN_SECRET`. Historian connection
|
||||
settings come from `OTOPCUA_HISTORIAN_SERVER` / `_PORT` / `_INTEGRATED` /
|
||||
`_USER` / `_PASS` etc. (see
|
||||
`src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware/Program.cs`).
|
||||
- **TCP-only mode**: with `OTOPCUA_HISTORIAN_ENABLED!=true` the sidecar boots
|
||||
without loading the SDK at all — used for smoke and IPC tests.
|
||||
- **Wire**: MessagePack-framed request/reply over TCP (optionally TLS). The
|
||||
client proves the shared secret in a `Hello` frame before any history calls.
|
||||
The client owns a single channel with one in-flight call at a time and retries
|
||||
a transport failure once before propagating — broader backoff is the caller's
|
||||
responsibility.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Sidecar unit tests** —
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests/` cover the
|
||||
reader, the alarm-write backend outcome classification, and the TCP frame
|
||||
handler with a faked SDK seam; `TcpRoundTripTests` exercises the plaintext +
|
||||
TLS paths including the bad-secret rejection case.
|
||||
- **Client unit tests** —
|
||||
`tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Tests/`
|
||||
cover the TCP client + framing against loopback `TcpListener` fixtures.
|
||||
|
||||
## Further reading
|
||||
|
||||
- [ServiceHosting.md](../ServiceHosting.md) — where the sidecar fits in a
|
||||
deployment and how it's installed
|
||||
- [AlarmHistorian.md](../AlarmHistorian.md) — the alarm store-and-forward flow
|
||||
that feeds the write-back path
|
||||
The `AlarmHistorian` section's old Wonderware connection keys (`Host`/`Port`/`UseTls`/`ServerCertThumbprint`/`SharedSecret`)
|
||||
were pruned — remove them; the SQLite store-and-forward knobs are retained and the downstream connection is
|
||||
now sourced from `ServerHistorian`.
|
||||
|
||||
@@ -11,7 +11,7 @@ OtOpcUa is a multi-driver OPC UA server. The Core (`ZB.MOM.WW.OtOpcUa.Core` + `C
|
||||
- `IAlarmSource` — driver-emitted OPC UA A&C events
|
||||
- `IHistoryProvider` — driver-side raw / processed / at-time / events HistoryRead (see [HistoricalDataAccess.md](../v1/HistoricalDataAccess.md))
|
||||
- `IRediscoverable` — driver-initiated address-space rebuild notifications
|
||||
- `IHistorianDataSource` — server-side historian sink registration (the Wonderware Historian backend), distinct from the driver-side `IHistoryProvider` HistoryRead path
|
||||
- `IHistorianDataSource` — server-side historian read backend registration (the HistorianGateway backend), distinct from the driver-side `IHistoryProvider` HistoryRead path
|
||||
|
||||
Each driver opts into only the capabilities it supports. Every async capability call at the Server dispatch layer goes through `CapabilityInvoker` (`Core/Resilience/CapabilityInvoker.cs`), which wraps it in a Polly pipeline keyed on `(DriverInstanceId, HostName, DriverCapability)`. The `OTOPCUA0001` analyzer enforces the wrap at build time. Drivers themselves never depend on Polly; they just implement the capability interface and let the Core wrap it.
|
||||
|
||||
@@ -29,7 +29,7 @@ Driver type metadata is registered at startup in `DriverTypeRegistry` (`src/Core
|
||||
| [TwinCAT](TwinCAT.md) | `Driver.TwinCAT` | B | Beckhoff `TwinCAT.Ads` (`TcAdsClient`) | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IRediscoverable | The only native-notification driver outside Galaxy — ADS delivers `ValueChangedCallback` events the driver forwards straight to `ISubscribable.OnDataChange` without polling. Symbol tree uploaded via `SymbolLoaderFactory` |
|
||||
| [FOCAS](FOCAS.md) | `Driver.FOCAS` | A | Pure-managed `FocasWireClient` — FOCAS/2 Ethernet binary protocol on TCP:8193, inlined into the driver assembly | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IHostConnectivityProbe, IPerCallHostResolver, IAlarmSource | `IWritable` is implemented but read-only by design — `WriteAsync` returns `BadNotWritable` for every point. CNC-shaped data model (axes, spindle, PMC, macros, alarms) not a flat tag map. Previously Tier-C (Host + P/Invoke + shim DLL); retired in the 2026-04-24 migration when the managed wire client landed |
|
||||
| [OPC UA Client](OpcUaClient.md) | `Driver.OpcUaClient` | B | OPCFoundation `Opc.Ua.Client` | IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IAlarmSource, IHistoryProvider, IHostConnectivityProbe | Gateway/aggregation driver — the only driver implementing driver-side `IHistoryProvider` (forwards HistoryRead to the upstream server). Opens a single `Session` against a remote OPC UA server and re-exposes its address space. Owns its own `ApplicationConfiguration` (distinct from `Client.Shared`) because it's always-on with keep-alive + `TransferSubscriptions` across SDK reconnect, not an interactive CLI |
|
||||
| [Historian.Wonderware](Historian.Wonderware.md) | `Driver.Historian.Wonderware` (+ `.Client`, `.Client.Contracts`) | — | `aahClientManaged` write SDK + AVEVA Historian SQL, over a pipe IPC backend | IHistorianDataSource (server-side historian sink) | Not a tag driver — a historian backend that registers `IHistorianDataSource` (`HistorianDataSource : IHistorianDataSource`) to satisfy HistoryRead and to sink tag/alarm history. No `IDriver`/`ITagDiscovery` surface |
|
||||
| [Historian.Gateway](../Historian.md) | `Driver.Historian.Gateway` | — | `ZB.MOM.WW.HistorianGateway.Client` gRPC (`historian_gateway.v1`) | IHistorianDataSource (server-side read backend) + alarm `SendEvent` writer + `WriteLiveValues` recorder + `IHistorianProvisioning` | Not a tag driver — the sole historian backend. Registers `GatewayHistorianDataSource : IHistorianDataSource` for HistoryRead and serves alarm-write + continuous historization through the gateway. No `IDriver`/`ITagDiscovery` surface. (The retired Wonderware sidecar backend it replaced is documented at [Historian.Wonderware.md](Historian.Wonderware.md).) |
|
||||
|
||||
## Per-driver documentation
|
||||
|
||||
@@ -48,8 +48,8 @@ Driver type metadata is registered at startup in `DriverTypeRegistry` (`src/Core
|
||||
- [TwinCAT.md](TwinCAT.md) — Beckhoff TwinCAT (ADS) driver: getting started, native-notification subscription, symbol-tree upload
|
||||
- [OpcUaClient.md](OpcUaClient.md) — OPC UA Client (gateway/aggregation) driver: remote-server session, driver-side HistoryRead forwarding, reconnect behaviour
|
||||
|
||||
- **Historian.Wonderware** (server-side historian sink, not a tag driver) has its own overview page:
|
||||
- [Historian.Wonderware.md](Historian.Wonderware.md) — AVEVA Historian backend: sink registration, HistoryRead dispatch, alarm store-and-forward, deployment prerequisites
|
||||
- **Historian.Gateway** (server-side historian backend, not a tag driver) is documented in the main guide:
|
||||
- [../Historian.md](../Historian.md) — HistorianGateway backend: read-path registration, HistoryRead dispatch, alarm store-and-forward (`SendEvent`), continuous historization (`WriteLiveValues`), `EnsureTags` provisioning, config keys, deployment prerequisites. (The retired Wonderware sidecar backend it replaced: [Historian.Wonderware.md](Historian.Wonderware.md).)
|
||||
|
||||
- The full per-field spec (capability surface, config schema, addressing, data-type maps, connection settings, quirks for every driver) lives in [docs/v2/driver-specs.md](../v2/driver-specs.md). The overview pages above are the short path; that file is the authoritative per-driver reference.
|
||||
|
||||
@@ -68,7 +68,7 @@ Each driver has a dedicated fixture doc that lays out what the integration / uni
|
||||
|
||||
## Related cross-driver docs
|
||||
|
||||
- [HistoricalDataAccess.md](../v1/HistoricalDataAccess.md) — `IHistoryProvider` dispatch, aggregate mapping, continuation points. The OPC UA Client driver is the only driver that implements driver-side `IHistoryProvider` (it forwards HistoryRead to the upstream server); the Aveva Historian path is served server-side by the Wonderware `IHistorianDataSource` sink instead. Other drivers do not implement the interface and return `BadHistoryOperationUnsupported`.
|
||||
- [HistoricalDataAccess.md](../v1/HistoricalDataAccess.md) — `IHistoryProvider` dispatch, aggregate mapping, continuation points. The OPC UA Client driver is the only driver that implements driver-side `IHistoryProvider` (it forwards HistoryRead to the upstream server); the AVEVA Historian path is served server-side by the HistorianGateway-backed `IHistorianDataSource` instead. Other drivers do not implement the interface and return `BadHistoryOperationUnsupported`.
|
||||
- [AlarmTracking.md](../AlarmTracking.md) — `IAlarmSource` event model and filtering. Implemented by Galaxy (native MxAccess alarms, working end-to-end), OPC UA Client, AB CIP, and FOCAS; AB Legacy, Modbus, S7, and TwinCAT have no alarm source.
|
||||
- [Subscriptions.md](../v1/Subscriptions.md) — how the Server multiplexes subscriptions onto `ISubscribable.OnDataChange`.
|
||||
- [docs/v2/driver-stability.md](../v2/driver-stability.md) — tier system (A / B / C), shared `CapabilityPolicy` defaults per tier × capability, `MemoryTracking` hybrid formula, and process-level recycle rules.
|
||||
|
||||
@@ -50,9 +50,10 @@ with a human-readable explanation rather than a false-green TCP-open tick.
|
||||
| **FOCAS** | `cnc_allclibhndl3` via a direct `DllImport("fwlib32")` in the probe. See [degrade semantics](#focas-degrade) below. | `"FOCAS handle OK"` | Deferred — no CNC + FWLIB |
|
||||
| **Galaxy** | gRPC unary call to `GalaxyRepository.TestConnection` on the configured mxaccessgw endpoint. See [auth-rejection rule](#galaxy-auth-rejection) below. | `"gateway gRPC OK"` | `http://10.100.0.48:5120` (mxaccessgw) |
|
||||
|
||||
**Historian.Wonderware** already performed a real handshake (`Hello` → `HelloAck`)
|
||||
before Phase 5 and was not changed by this work. See
|
||||
[`Historian.Wonderware.md`](Historian.Wonderware.md) for details.
|
||||
**Historian.Wonderware** had a TCP `Hello`→`HelloAck` handshake probe before Phase 5, but the
|
||||
Wonderware historian backend (and its driver-type / probe) has since been **retired** — the historian
|
||||
backend is now the external HistorianGateway (a gRPC client package, not a probed `IDriver`). See
|
||||
[`Historian.Wonderware.md`](Historian.Wonderware.md) (retired stub) and [`../Historian.md`](../Historian.md).
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -0,0 +1,180 @@
|
||||
# OtOpcUa equipment-tag data-plane — why live FOCAS values don't surface (investigation plan)
|
||||
|
||||
**Date:** 2026-06-25
|
||||
**Companion to:** [`2026-06-25-focas-pdu-v3-implementation-plan.md`](2026-06-25-focas-pdu-v3-implementation-plan.md) (the FOCAS PDU-v3 driver work — DONE + deployed) and [`../deployments/wonder-app-vd03-makino-z-34184.md`](../deployments/wonder-app-vd03-makino-z-34184.md).
|
||||
**Goal:** make the deployed Makino tags (`parts-count`/`parts-required` = `MACRO:3901/3902`) actually carry live values over OPC UA, and make the FOCAS FixedTree (Identity/Axes/Timers/…) appear in the served address space.
|
||||
|
||||
## The symptom (precise, observed 2026-06-25 after the v3 deploy)
|
||||
Against `opc.tcp://wonder-app-vd03.zmr.zimmer.com:4840/OtOpcUa` (Security None, anonymous, DisableLogin):
|
||||
- `read` AND a 30 s `subscribe` of `ns=2;s=EQ-3686c0272279/parts-count` (and `/parts-required`) return **`0x80320000` Bad_WaitingForInitialData** — never a value.
|
||||
- A recursive browse of the whole served tree shows ONLY `machining → makino → z-34184 → {parts-count, parts-required}` — **no FixedTree nodes** (Identity/Axes/Timers/Program/…).
|
||||
- Unchanged by host-restart, a full AdminUI re-deploy (`12e0d528`, Sealed/In-sync), and a driver `Restart`.
|
||||
- A ~15 s box-side watch saw **no 250 ms-cadence TCP connection** to `10.201.31.5:8193` — only the occasional probe connect.
|
||||
|
||||
## What is RULED OUT (do not re-investigate)
|
||||
- **The FOCAS PDU-v3 driver.** Proven working: the rewritten `FocasDriverProbe` does a real initiate + `cnc_statinfo` and the AdminUI shows **DRIVER STATUS: HEALTHY, last success Ns ago**; and on the dev box every surface (sysinfo / axes / dynamic / macros / timer / PMC / servo / alarms) reads correctly through the same `WireFocasClient`. The wire path talks v3 to the Makino.
|
||||
- **Deployment / Akka roles.** Re-deploy sealed with no "task canceled"; the node has admin+driver roles; the address space (the 2 UNS tags) is served.
|
||||
- **Reachability.** TCP 8193 reachable from the box; OPC 4840 reachable from the dev box.
|
||||
|
||||
So the problem is the OtOpcUa **data plane**: the driver's *control/probe plane* runs, but its *data plane* (the FixedTree poll loop output + the equipment-tag value source) is not reaching the served address space. This was present last session too — it was masked by the v3 reject; with v3 now working it stands alone.
|
||||
|
||||
## Leading hypotheses (to confirm/refute, not assume)
|
||||
- **H1 — FixedTree bootstrap is stuck/throwing.** `FocasDriver.FixedTreeLoopAsync` is started at `FocasDriver.cs:160` inside `InitializeAsync` (which demonstrably ran — the probe loop at `:137` is alive). Its bootstrap `while (state.FixedTreeCache is null)` loop (~`:650`) calls `GetSysInfoAsync`/`GetAxisNamesAsync`/optional probes; if one throws it retries every 2 s and the cache never populates → no FixedTree nodes, no poll. The brief 2 s-spaced connects could explain "no steady 250 ms connection."
|
||||
- **H2 — FixedTree nodes aren't projected under the Equipment model.** Galaxy/FOCAS are now *standard Equipment-kind drivers*; the served tree is the UNS/Equipment projection (configured tags only). Driver `DiscoverAsync` FixedTree auto-nodes may only live in a raw-driver namespace that the UNS browse doesn't show — i.e. FixedTree-under-Equipment may be unsupported-by-design, not a bug.
|
||||
- **H3 — equipment-tag value source never primed.** A point-read returns the server's cached value; with no active poll/subscription seeding it, that's `WaitingForInitialData`. But a 30 s `subscribe` ALSO stayed bad — so either the monitored-item → `FocasDriver.ReadAsync` wiring isn't firing, or `ReadAsync(MACRO:3901)` returns no value on the box. (Analogy: the ScadaBridge DCL seed-ordering / static-tag false-bad class of bug — see that memory.)
|
||||
- **H4 — a data-plane gate.** A role/active-node gate (cf. "data-plane roles need `Security:Ldap:GroupToRole` mapped or write/ack is inert") could suppress the read/poll on this single fused node under DisableLogin.
|
||||
|
||||
## ⚠️ Observability is the bottleneck
|
||||
There are currently **no OtOpcUa app logs on the box** (Windows eventlog shows only NSSM start/stop; Serilog has no file sink in this deployment — cf. the MxGateway windev note). **Getting runtime logs is the single highest-leverage step** and gates H1/H3/H4.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Offline code trace of the data plane (do FIRST; free, no box/CNC)
|
||||
Trace, in the repo on the dev box, the two paths end-to-end. This may reveal the root cause with zero box access.
|
||||
1. **Equipment-tag read path:** from an OPC UA `Read`/`CreateMonitoredItem` on `ns=2;s=EQ-…/parts-count` → the server's address-space node → the "FullName→NodeId router" → `FocasDriver.ReadAsync`. Find: who invokes the driver read, whether point-reads vs subscriptions differ, and where/whether a value is cached vs `WaitingForInitialData` is returned. Entry points: `FocasDriver.cs:247` (resolver-produced equipment tags), `FocasDriver.ReadAsync`, and the server-side poll-group / monitored-item sampler (grep `PollGroup`, `MonitoredItem`, the Equipment namespace value provider).
|
||||
2. **FixedTree path:** `FocasDriver.FixedTreeLoopAsync` (`:644`) → `state.FixedTreeCache` → `TryReadFixedTree` → how those nodes are added to the address space. Determine whether FixedTree nodes are emitted into the **Equipment/UNS projection** at all, or only a raw-driver namespace (resolves H2). Grep `DiscoverAsync`, `FixedTreeReference`, and the address-space builder for the Equipment kind.
|
||||
3. **Driver lifecycle/gates:** confirm what conditions start/suppress the data poll vs the probe loop (active-node, role, enabled flags) — `FocasDriver.cs:87–181`, the cluster/active-node gate.
|
||||
- **Output:** a written root-cause hypothesis (which of H1–H4) + the specific code site, OR a precise list of what only box logs can disambiguate.
|
||||
|
||||
### ✅ Phase 1 FINDINGS (2026-06-25, offline subagent code-trace — DONE)
|
||||
|
||||
| Hypothesis | Verdict | Confidence |
|
||||
|---|---|---|
|
||||
| **H2** — FixedTree nodes not projected under Equipment model | **CONFIRMED root cause of symptom #2 — by-design** | proven from code |
|
||||
| **H3** — equipment-tag value source never primed | **CONFIRMED mechanism of symptom #1**; routing code is self-consistent → break is **runtime**, not static | mechanism proven; exact break needs box logs |
|
||||
| **H4** — role/active-node data-plane gate | **REFUTED** for the read/subscribe path | proven from code |
|
||||
| **H1** — FixedTree bootstrap stuck/throwing | best fit for symptom #3 (no 250 ms connect), but **moot** for #2 and **not** the cause of #1 | strongly-suggested; needs logs |
|
||||
| **H5 (new)** — FOCAS tag with blank `deviceHostAddress` → `BadNodeIdUnknown` | **latent trap** — yields a *Bad* value, not WaitingForInitialData, so it's the *next* failure, not the current symptom | proven from code |
|
||||
|
||||
**Why symptom #2 (no FixedTree nodes):** FixedTree nodes are emitted **only** by `FocasDriver.DiscoverAsync` (`FocasDriver.cs:391-494`, gated on `FixedTreeCache`), and `DiscoverAsync` is **never invoked** by the deployed equipment-kind address-space build. Production materializes the served tree **purely from Config-DB Tag rows** (`EquipmentNodeWalker.cs:24-25,161-165`; `OpcUaPublishActor.RebuildAddressSpace` → `MaterialiseEquipmentTags(composition)` at `OpcUaPublishActor.cs:326-337`). No runtime caller of `IDriver.DiscoverAsync` exists. ⇒ **By-design gap, not a bug.** Surfacing FixedTree under the Equipment tree is a *feature* (author explicit Tag rows per signal, or extend the composition pipeline to merge `ITagDiscovery` output — a substantial `AddressSpaceComposer`/`AddressSpaceApplier` change).
|
||||
|
||||
**Why symptom #1 (`0x80320000` even under a 30 s subscribe):** equipment-tag node value is **push-only**. `0x80320000` is the materialization **seed** (`OtOpcUaNodeManager.EnsureVariable:1400-1402`); there is **no `OnReadValue`/server-side sampler** for equipment tags, so both Read and MonitoredItem serve the cached node value. The seed is overwritten **only** by `ForwardToMux → WriteValue` (`OtOpcUaNodeManager.cs:261-281`), which writes even Bad/null. `StatusFromQuality` maps Bad → `0x80000000`, **not** `0x80320000`. ⇒ A persistent `0x80320000` **proves no `AttributeValueUpdate` ever landed** for those two NodeIds. The routing chain (`DriverHostActor.PushDesiredSubscriptions` → `_nodeIdByDriverRef` → `DriverInstanceActor.ResubscribeDesired` → `PollGroupEngine` forceRaise → `OnDataChangeForward` → `ForwardToMux`) is **provably self-consistent for FOCAS** (`FocasTagConfigModel` writes no top-level `FullName` → `ExtractTagFullName` returns the raw JSON blob → `FocasEquipmentTagParser.TryParse` resolves it; subscribed ref == published ref == routing-map key == NodeId, all from the same `t.FullName`). So the static mapping is **refuted**; the break is **runtime**: either the FOCAS child was never subscribed those refs, or `_nodeIdByDriverRef` lacked the `(DriverInstanceId, FullName)` key at publish time — most likely a **`DriverInstanceId` attribution mismatch** between the equipment Tag rows and the spawned `DriverInstance`.
|
||||
|
||||
**What only box logs (or a ConfigDb snapshot read) can disambiguate for symptom #1 — capture these on the driver-role node after a deploy:**
|
||||
1. `DriverHostActor` info *"SubscribeBulk pushed {Refs} references across {Drivers} driver(s)"* (`DriverHostActor.cs:1058`) — `Refs<2` ⇒ equipment Tag `DriverInstanceId` ≠ spawned FOCAS child id (attribution mismatch).
|
||||
2. `DriverInstanceActor` info *"subscribed to {Count} refs"* (`DriverInstanceActor.cs:571`) — absent/`0` ⇒ `_desiredRefs` empty or never reached `Connected`.
|
||||
3. `DriverHostActor` debug *"no equipment-tag NodeId for ({Driver},{Ref}) — value dropped"* (`DriverHostActor.cs:550`) — present ⇒ `_nodeIdByDriverRef` key miss.
|
||||
4. `FocasDriver` debug *"FOCAS fixed-tree bootstrap failed … retrying"* every ~2 s (`FocasDriver.cs:686`) ⇒ confirms H1 (`cnc_sysinfo`/`cnc_rdaxisname` failing on the Makino). Needs Debug level + logger-constructed wire client.
|
||||
5. A driver-side read of `MACRO:3901` (live Makino): `BadNodeIdUnknown` ⇒ H5 blank-`deviceHostAddress` trap; Good value ⇒ wire path fine, gap is purely the OPC-UA equipment subscription wiring.
|
||||
|
||||
**Cheaper-than-Phase-2 lead:** the prime suspect (Tag-row `DriverInstanceId` vs `DriverInstance` row mismatch) is checkable by reading the **deployed ConfigDb snapshot** (`OtOpcUaConfig` on `wonder-sql-vd03:1433`) — no service restart, no log-sink change. Worth doing before/instead of the Phase 2 log-sink change.
|
||||
|
||||
**Suggested minimal-fix shapes (NOT implemented):**
|
||||
- Symptom #1: depends on which log line/row fires — if attribution miss, it's a **config/snapshot re-bind** (no code). If a real wiring gap, localize first.
|
||||
- H5 latent: in `FocasDriver.ReadAsync`/`WriteAsync` device lookup (`:289`,`:346`), when `DeviceHostAddress` is blank and `_devices.Count==1`, resolve to the sole device instead of `BadNodeIdUnknown` (or make the AdminUI field mandatory).
|
||||
- Symptom #2 (H2): no fix — by-design; surfacing FixedTree is a feature decision.
|
||||
|
||||
### ✅ ConfigDb snapshot read (2026-06-25, symptom #1 path — chosen over box logs)
|
||||
Read live `OtOpcUaConfig` on `wonder-sql-vd03` (query run on-box so the SQL password never left the box). Ground truth:
|
||||
- **Equipment** `EQ-3686c0272279` (`z-34184`): `DriverInstanceId=focas-z-34184`, `DeviceId=NULL`, `Enabled=True`, `UnsLineId=makino`.
|
||||
- **Tags** `parts-count`/`parts-required`: both `DriverInstanceId=focas-z-34184`, `DeviceId=NULL`, `DataType=Double`, `AccessLevel=Read`, `FolderPath=NULL`, **`PollGroupId=NULL`**, `TagConfig={"deviceHostAddress":"10.201.31.5:8193","address":"MACRO:390{1,2}","dataType":"Float64"}`.
|
||||
- **DriverInstance**: one row `focas-z-34184` (`DriverType=Focas`, `Enabled=True`, ns `dev-equipment`, cluster `DEV`).
|
||||
- **Device** table: **0 rows** — NORMAL: `FocasDriver` builds `_devices` from `_options.Devices` (DriverConfig JSON) not the Device table (`FocasDriver.cs:97`).
|
||||
|
||||
**Refuted by this read:** prime-suspect `DriverInstanceId` attribution mismatch (matches exactly) and H5 blank-`deviceHostAddress` (present). The deployed config is **clean**. ⇒ symptom #1 is a pure value-flow-plumbing break. New live leads: **`PollGroupId=NULL`** on both tags (is a poll group required to subscribe/poll?) and the **resolver-registration** path (equipment-tag refs are "resolver-produced, not seeded at `InitializeAsync`" per `FocasDriver.cs:247` — does poll-time `TryResolve` of the JSON-blob ref ever succeed?). A second offline subagent trace of DriverHostActor↔DriverInstanceActor↔PollGroupEngine↔resolver is running to pin the exact broken link.
|
||||
|
||||
### ✅ FixedTree feature (symptom #2) — BUILT 2026-06-26 (architecture reality below)
|
||||
Mapped the composition pipeline. Two address-space paths exist: (1) **Equipment/UNS projection** `AddressSpaceComposer.Compose` (config entities only) → `AddressSpaceApplier.MaterialiseEquipmentTags` → the served `ns=2` tree where `EQ-…` lives; (2) **raw-driver namespace** `GenericDriverNodeManager.BuildAddressSpaceAsync` → `driver.DiscoverAsync(IAddressSpaceBuilder)`. **Path 2 is DEAD: `BuildAddressSpaceAsync` has no runtime caller and `OpcUaApplicationHost.PopulateAddressSpaces` (its referenced caller) no longer exists.** Even `GalaxyDriver.DiscoverAsync` (`:588`) is reachable only via that dead path — Galaxy surfaces its hierarchy by being **authored as config equipment/tags**, not via discovery. ⇒ In the current Equipment-kind model **every served node is config-driven; `ITagDiscovery`/`DiscoverAsync` is legacy/dead for serving.** So "build the FixedTree feature" is NOT re-wiring an existing path — it's a **new dynamic-node-injection capability** into the Equipment projection, and it must solve a **timing problem**: composition runs at deploy/apply (before the driver connects), but FixedTree data only exists after the driver's async `FixedTreeCache` bootstrap. The far cheaper alternative that yields the same visible result is to **author FixedTree signals as config Tag rows** (each bound to a FOCAS fixed-tree reference) — same mechanism every other equipment tag uses. (The user chose to **build the dynamic feature** over the config-rows alternative.)
|
||||
|
||||
**✅ BUILT (2026-06-26).** Implemented as a generic **post-connect `ITagDiscovery` injection pipeline**: when a driver reaches `Connected`, `DriverInstanceActor` runs bounded re-discovery into a capturing `IAddressSpaceBuilder` and ships `DiscoveredNodesReady` to `DriverHostActor`, which maps the nodes under the equipment (`EQ-…/FOCAS/…`, read-only), extends the `_nodeIdByDriverRef` routing map, and tells `OpcUaPublishActor` to incrementally materialise them — reusing the existing materialize→subscribe→poll→push pipeline (no full rebuild). Survives redeploys (re-applied at the tail of `PushDesiredSubscriptions`) and restarts (re-discovered on reconnect). Design: [`2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](2026-06-26-otopcua-fixedtree-equipment-injection-design.md); implementation plan (11 bite-sized tasks, all green): [`2026-06-26-otopcua-fixedtree-equipment-injection.md`](2026-06-26-otopcua-fixedtree-equipment-injection.md). **Offline-complete** on branch `feat/focas-fixedtree-equipment-injection` (solution build 0 errors / 0 warnings; Runtime.Tests 312, OpcUaServer.Tests 304, FOCAS 247 + an end-to-end injection+value-flow test, all green). The review chain caught + fixed three real defects (a `DriverDataType→OPC-UA-type` string mismatch, a `Server.ReportEvent`-under-lock deadlock, and a `ConfigureAwait(false)` off-actor-context crash for async drivers). **Live wonder validation pending** (deploy the current host + browse `ns=2;s=EQ-3686c0272279/FOCAS/Identity/SeriesNumber`, `…/FOCAS/Axes/X/AbsolutePosition`).
|
||||
|
||||
### 🎯 ROOT CAUSE — symptom #1 (CONFIRMED, 2026-06-25, 2nd subagent trace + code verify)
|
||||
**The FOCAS poll read hangs forever because (1) all wire I/O for a device shares one socket with NO serialization, and (2) the steady-state read has NO timeout.**
|
||||
|
||||
- **Unsynchronized shared socket:** `FocasDriver.EnsureConnectedAsync` (`:1101-1128`) returns the single `device.Client` with no I/O mutex. `DeviceState.ProbeLock` (`:1172`) only synchronizes probes with each other. Four independent loops issue wire ops on that same socket concurrently: the equipment poll (`PollGroupEngine`, 1 s — `SubscriptionPublishingInterval` const `DriverHostActor.cs:58`), the FixedTree loop (250 ms / 2 s bootstrap), the probe (5 s), the recycle loop. FOCAS/2 is strict request-response on one socket → concurrent unsynchronized reads interleave; one reader consumes another's response PDU and the victim read blocks waiting for bytes that never come.
|
||||
- **No read timeout:** the poll calls `FocasDriver.ReadAsync` (`:295-308`) → `WireFocasClient.ReadAsync` (`:417`) → `ReadMacroAsync` **without** the optional `timeout` → `FocasWireClient.CreateCallTimeout` (`:843-848`) skips `CancelAfter` when `timeout` is null → `ReadExpectedPduAsync` awaits the socket under a token that never fires. The blocked first `forceRaise` poll (`PollGroupEngine.cs:119`) never completes → `onChange` never fires → the `BadWaitingForInitialData` materialization seed (`OtOpcUaNodeManager.cs:1400`) is never overwritten → permanent `0x80320000`. (Connect `:1119` and probe `:604-610` ARE bounded — only the steady-state read isn't.)
|
||||
|
||||
**Why it works on the dev box but not deployed:** the dev-box harness/CLI does ONE read at a time with no FixedTree loop running → no socket collision → macros read fine (proven in the v3 work). Deployed, the FixedTree loop races the equipment poll on one socket → collision → hung read.
|
||||
|
||||
**Why both tags fail together:** they're in one driver's single 1 s subscription batch; `PollOnceAsync` reads them sequentially and hangs on the first, so neither reaches `onChange`.
|
||||
|
||||
**Ruled out:** `PollGroupId=NULL` is a red herring — verified at `ConfigComposer.cs:38` (snapshots all tags, no poll-group filter), `AddressSpaceComposer.Compose` (never reads `PollGroupId`), and `DriverHostActor.PushDesiredSubscriptions:965-973` (groups by `DriverInstanceId`, constant 1 s interval, no poll-group keying). Routing-key mismatch ruled out: subscribe ref == routing key == `onChange` ref (all from the same `FullName`). Not-subscribed ruled out by elimination: a *served* node implies it's in the same `ParseComposition` artifact used for subscribe; and every *deterministic* `ReadAsync` outcome pushes a specific Bad code (`BadNodeIdUnknown`/`BadCommunicationError`/`BadNotSupported`), none of which is `0x80320000` — only a hung (never-returning) read leaves the seed intact.
|
||||
|
||||
**PROPOSED FIX (two parts, FOCAS-driver-only, single managed DLL, no migration/proto change — same low-blast-radius deploy path as the v3 work):**
|
||||
1. **Serialize per-device wire I/O** — an async gate (`SemaphoreSlim`) on `DeviceState` held around each `EnsureConnected + wire op` so the equipment poll, FixedTree loop, probe, and recycle never collide on the one socket. *This is what makes values actually read Good.* MUST be paired with #2 (a lock around an unbounded read would deadlock all I/O).
|
||||
2. **Bound every steady-state wire read/write** with `_options.Timeout` (mirror the probe's linked CTS at `:604-610`, or thread `timeout` through `WireFocasClient.ReadAsync`→`ReadMacroAsync`). Converts any stall into a recoverable `BadCommunicationError` push (overwrites the seed, downgrades health → observable + self-healing) instead of permanent silence.
|
||||
|
||||
**Confidence:** the two structural gaps are *proven from code*; that their interaction is the active trigger is *strongly-suggested* (works single-threaded on dev box, hangs deployed; fits every observation). Definitive live confirmation = the fix makes the tags leave `0x80320000` (Good, or recoverable BadComm if the macro read genuinely fails). **This is a debugging fix headed for a production CNC node — implement + unit-test (serialization + read-timeout) locally, then deploy to wonder and verify, per the diagnose-before-deploy discipline.**
|
||||
|
||||
### Implementation + deploy status (2026-06-26)
|
||||
- **Fix implemented** on branch `fix/focas-poll-io-serialization` (off `feat/focas-pdu-v3`): new `SynchronizedFocasClient` decorator (per-device `SemaphoreSlim` gate + per-call `_options.Timeout`) wired into `FocasDriver.EnsureConnectedAsync`; `ReadAsync`/`WriteAsync` now map a per-call timeout (OCE while caller token live) → `BadCommunicationError` instead of rethrowing. **243 FOCAS tests green** (8 new in `FocasIoSerializationTests.cs` + 1 sibling in `FocasReadWriteTests`), full solution builds 0 errors.
|
||||
- **Live baseline re-confirmed** via OtOpcUa CLI from the Mac: both tags `[80320000]`.
|
||||
- **Deployed** the Release DLL to wonder (single managed-DLL swap; v3 DLL 222208 B → fix DLL 228864 B; backup `E:\ApiInstall\OtOpcUa\_focasbak-pre-iofix-20260626T041913.dll`; service Running, OPC 4840 listening).
|
||||
- **BLOCKER for verification:** after the swap+restart the FOCAS driver is **not connecting at all** (zero TCP to `10.201.31.5:8193` at 150 s uptime — not even a probe connect), so tags still `[80320000]`.
|
||||
- **Deploy API enabled** (user-approved): added `Security__DeployApiKey` (64-char generated, value on-box only) to the `OtOpcUaHost` service `Environment` REG_MULTI_SZ + restart; env backed up to `_envbak-pre-deploykey-20260626T043001.txt`. Headless `POST http://localhost:9000/api/deployments` (key read from registry on-box) returned **HTTP 202 Accepted**, sealed deployment `ef384b04…`.
|
||||
- **Tags STILL `[80320000]`** after the sealed deploy. **Root of the live blocker (from box logs):** the service writes Serilog to `C:\Windows\System32\logs\otopcua-<date>.log` (relative-path-from-service-CWD bug — pending.md D2). Today's log shows the **admin/publish side only**: `Phase7Applier: hierarchy materialised (areas=1,lines=1,equipment=1)` + `equipment tags materialised (tags=2)` at each restart (04:19, 04:30) — which is why the 2 tags are served with the `0x80320000` seed. But across the **entire** day's log there are **ZERO** driver-side lines: `DriverHost`=0, `SubscribeBulk`=0, `subscribed to`=0, `Focas`=0, `GenericDriver`=0 (the 13 `DriverInstance` hits are all SQL `FROM [DriverInstance]`). **⇒ the DriverHostActor / driver-role side is not spawning the FOCAS driver at all on this node today**, so there is no poll, no value push, permanent seed. Health: `/healthz` Healthy, Akka member **Up**, `admin-leader` **Active**; roles env intact (`Cluster__Roles__0=admin`, `__1=driver`, seed=self:4053); `DOTNET_ENVIRONMENT=Production`. The admin half is fully alive; only the driver half is silent.
|
||||
- **This is a SEPARATE blocker from the I/O fix** (which is correct, unit-tested, and deployed — but cannot be exercised until the driver actually polls).
|
||||
|
||||
### Driver-host activation diagnosis (2026-06-26, Debug-logging pass + code reads)
|
||||
- **Debug Serilog enabled** (env `Serilog__MinimumLevel__Default=Debug` + EF/Microsoft/System→Warning; env bak `_envbak-pre-debug-*.txt`). Revealed: **NO exceptions / load errors anywhere** (my fix DLL is NOT the cause), but also **ZERO Akka lines** in Serilog (`Akka`/`Member`/`akka.tcp`/`singleton` = 0).
|
||||
- **`DriverHostActor` logs via `Context.GetLogger()` (Akka `ILoggingAdapter`), and there is NO `Akka.Logger.Serilog` bridge anywhere in the app** (verified across `*.cs`/`*.csproj`/HOCON). So Akka actor logs go to the default console logger → **discarded for a Windows service → invisible in BOTH Production AND Development.** `appsettings.Development.json` only changes Serilog min-levels (+ `Ldap:DevStubMode=true`); it adds no Akka bridge. ⇒ **`DOTNET_ENVIRONMENT=Development` would NOT surface the driver host** (held off — futile + flips DevStubMode on prod).
|
||||
- **The `DriverHostActor` IS instantiated** (unconditionally, same `system.ActorOf` block as the working `OpcUaPublishActor` at `Runtime/ServiceCollectionExtensions.cs:~232`). So it runs — it just isn't producing a connected FOCAS driver, and logs via Akka so the reason is invisible.
|
||||
- **STRONG HYPOTHESIS (explains everything):** on bootstrap the `DriverHostActor` *recovers* its persisted state (its own logs name `entering Steady` / `recovered Applied state at rev {Rev}`), but a process restart has already killed the live `DriverInstanceActor`s + their FOCAS sockets — and **recovery does NOT re-instantiate them**. Drivers re-spawn only on a deployment dispatch whose **revision changes**. This config is now **stable** (`revisionHash 924b59097eba…` is identical across `0c2db588`/`ef384b04`/`a2a84646` — every `POST /api/deployments` returns 202 but with the SAME rev), so repeated deploys are no-ops for the driver host → after any restart there are **no live drivers** and nothing re-spawns them → zero Makino TCP → tags frozen at the `0x80320000` materialization seed. The **prior session got HEALTHY because the config was being *authored*** (each deploy had a NEW rev → forced a spawn); once authoring stopped, a restart leaves the driver side dark. ⇒ likely the real reason "host-restart didn't change it" in the original symptom.
|
||||
- **Implication:** to exercise the I/O fix live, force a driver re-spawn by bumping the revision (e.g. toggle the FOCAS DriverInstance `Enabled` off→on, or any benign config edit, then deploy) — reversible. Separately, "restart should re-spawn applied drivers" + "Akka logs should bridge to Serilog" are real OtOpcUa robustness/observability gaps (the latter = the observability bottleneck this plan flagged).
|
||||
- **Box env mutations still in place (to revert when done):** `Security__DeployApiKey` (deploy API), `Serilog__MinimumLevel__*` (Debug). Backups: `_envbak-pre-deploykey-*`, `_envbak-pre-debug-*`.
|
||||
|
||||
### Akka→Serilog observability attempt (2026-06-26) — code done, prod deploy BLOCKED by self-contained layout
|
||||
- **Code change (correct, builds 0 errors):** added `Akka.Logger.Serilog` 1.5.60 (deps all satisfied by existing pins — Akka 1.5.62 / Serilog 4.3.1) to `Directory.Packages.props` + the Cluster csproj, and `loggers=["Akka.Logger.Serilog.SerilogLogger, …"]` + `loglevel=DEBUG` + `logger-startup-timeout=30s` to the embedded `Resources/akka.conf`. This bridges the DriverHostActor's Akka `ILoggingAdapter` into the Serilog file sink.
|
||||
- **Prod deploy FAILED TWICE (both auto-rolled-back; service is UP + 4840 listening the whole time after each):**
|
||||
1. Cluster.dll + Akka.Logger.Serilog.dll swap → crash `FileNotFoundException: Could not load 'Akka.Logger.Serilog 1.5.60'` from `Serilog.Settings.Configuration.ConfigurationReader.LoadConfigurationAssemblies` (the deployed `OtOpcUa.Host.deps.json` didn't list the new assembly; Serilog's `*.Serilog` config-assembly scan then fails fatally).
|
||||
2. Added the updated `OtOpcUa.Host.deps.json` → crash **"Could not resolve CoreCLR path."** ⇒ the box deployment is **SELF-CONTAINED** (bundles its own runtime), so a `dotnet build` (framework-dependent) deps.json breaks the apphost's runtime resolution.
|
||||
- **Conclusion:** adding a NEW assembly to this box requires a **full self-contained publish-overlay** (match the box layout; preserve `appsettings*`/`data\`), NOT a DLL/deps.json swap. That's a heavy/risky prod op for what is *diagnostic* observability of the (separate, pre-existing) driver-host re-spawn issue.
|
||||
- **Prod state now (verified healthy):** FOCAS **I/O-fix DLL still deployed** (228864 B — untouched by these rollbacks), Cluster.dll + deps.json restored to baseline, `Akka.Logger.Serilog.dll` renamed `.disabled`, service Running + OPC 4840 listening. Deploy-API key + Debug-Serilog env still set.
|
||||
- **RECOMMENDED PIVOT:** the cheap, no-prod-change way to validate the I/O fix is to **force a driver re-spawn via a benign config rev-bump** (the leading hypothesis); and the *proper* driver-host re-spawn fix + the Akka-observability are best developed in a **local docker-dev repro** (safe, full logging, one clean publish-overlay deploy at the end) rather than iterated on the production CNC node. **(User chose: move proper fix to local docker-dev.)**
|
||||
|
||||
### 🎯 ROOT CAUSE of the driver-not-spawning — STALE DEPLOYED BINARY (CONFIRMED 2026-06-26)
|
||||
The DriverHostActor's bootstrap-recovery **already re-spawns drivers in the current source**: `BootstrapRecover` `case Applied:` logs `"recovered Applied state at rev"` and calls `RestoreApplied()` (`DriverHostActor.cs:449-457`), which does `ReconcileDrivers` (re-spawn) + `RebuildAddressSpace` + `PushDesiredSubscriptions` (`:910-918`) — comment: *"the in-memory driver children + OPC UA address space were lost on restart … re-spawn … instead of waiting for a config change (whose identical-config revision would no-op)."* Added by `b1b3f3ff` (2026-06-06) + `397f9b78` (2026-06-07); both ancestors of `feat/focas-pdu-v3`; covered by existing `Restore_on_bootstrap_*` tests.
|
||||
- **The DEPLOYED wonder `Runtime.dll` is the June-16 base install** (`mtime 2026-06-16 13:05:46`; only the FOCAS *driver* DLL was ever swapped). Binary string scan: `RestoreApplied`/`ReconcileDrivers` method names PRESENT, but the bootstrap-restore log strings `"recovered Applied state at rev"` + `"restored served state for applied deployment"` are **ABSENT** ⇒ the deployed binary **predates b1b3f3ff** and does **NOT** restore served state / re-spawn drivers on bootstrap. **That is exactly why, on this stable-config box, a restart leaves the driver side dark** (and why only config-authoring re-deploys ever lit it up).
|
||||
- **⇒ No new driver-host fix is needed — it already exists in source.** The proper remedy = **deploy the current Host** (full self-contained publish-overlay onto `E:\ApiInstall\OtOpcUa`, preserving `appsettings*`/`data\`), which in one shot delivers: (1) `RestoreApplied`-on-bootstrap (drivers re-spawn after restart), (2) the FOCAS I/O serialization fix (symptom #1), (3) the Akka→Serilog bridge (observability). The earlier DLL-swap crashes were purely the self-contained-layout mismatch — a full publish-overlay is internally consistent and avoids them.
|
||||
- **PLAN:** confirm the current Host build boots cleanly + re-spawns drivers on restart in **docker-dev** (safe), then do ONE self-contained publish-overlay to wonder + verify tags leave `0x80320000`.
|
||||
|
||||
### ✅ docker-dev confirmation (2026-06-26) — current build re-spawns on bootstrap + Akka bridge works
|
||||
Built the current source into the docker-dev image (`otopcua-host:dev`), booted `central-1` (fused admin+driver, like wonder). Results:
|
||||
- **Boots cleanly** with `Akka.Logger.Serilog` (the prod DLL-swap crashes were purely the self-contained-layout mismatch, now confirmed).
|
||||
- **Akka→Serilog bridge works** — but needed TWO fixes beyond the package: (1) Akka.Hosting owns logger setup so HOCON `akka.loggers` is ignored → wire via `ConfigureLoggers(setup => { setup.LogLevel=DebugLevel; setup.ClearLoggers(); setup.AddLogger<SerilogLogger>(); })` in `WithOtOpcUaClusterBootstrap`; (2) `AddZbSerilog` registers Serilog as the MEL provider but does NOT set the static `Serilog.Log.Logger` (which `Akka.Logger.Serilog` writes to, AND which the Program.cs startup banner uses) → set `Serilog.Log.Logger = app.Services.GetRequiredService<Serilog.ILogger>()` in Program.cs right after `Build()`. With both, the startup banner + full Akka cluster/DriverHost logs now emit. *(The `Log.Logger`-unset gap is a latent bug in the shared `AddZbSerilog` lib affecting all 3 apps' static-`Log` calls — worth a follow-up there.)*
|
||||
- **`RestoreApplied` re-spawns drivers on bootstrap — CONFIRMED live:** `DriverHost central-1: recovered Applied state at rev …` → `spawned GalaxyMxGateway/Modbus/OpcUaClient driver` (×3) → `SubscribeBulk pushed 5 references across 3 driver(s)` → `restored served state for applied deployment … on bootstrap` → `DriverInstance …: subscribed to N refs`. These are the exact strings ABSENT from the stale wonder binary. ⇒ deploying the current Host fixes the wonder driver-not-spawning blocker.
|
||||
|
||||
### Deploy plan (self-contained publish-overlay → wonder)
|
||||
- Local: `dotnet publish …Host.csproj -c Release -r win-x64 --self-contained true -p:PublishSingleFile=false` (self-contained = brings its own runtime; no version-match concern with the box).
|
||||
- Zip + SFTP to `win64`. Box overlay (self-healing): full app-dir backup → robocopy publish over `E:\ApiInstall\OtOpcUa` **EXCLUDING `pki\` (OPC server cert) + `appsettings*.json`** → start → verify (4840 + tags) → AUTO-ROLLBACK from backup if unhealthy. Env (deploy key, Debug logging) is registry-side, untouched by the file overlay.
|
||||
- Then revert the Debug-Serilog env (now superseded; verbose) and verify `parts-count`/`parts-required` read Good (or recoverable BadComm), and that the FixedTree/driver values flow.
|
||||
|
||||
### ✅✅ SYMPTOM #1 FIXED + LIVE-VALIDATED ON WONDER (2026-06-26)
|
||||
After the self-contained overlay (current Host) + two light single-DLL FOCAS swaps, the validation revealed — and each fix peeled back — a **cascade of latent FOCAS-config-vs-driver mismatches** the stale binary had masked. Final state, all live on wonder:
|
||||
- `DriverHost …: recovered Applied state … → spawned Focas driver focas-z-34184 (stub=false) → DriverInstance focas-z-34184: connected → subscribed to 2 refs` — **two ESTABLISHED TCP sessions to `10.201.31.5:8193`**.
|
||||
- **`read ns=2;s=EQ-3686c0272279/parts-count` → Value 0, Status `0x00000000` (Good)**; `parts-required` → Good. (Value 0 is correct on the idle machine — status, not magnitude.) The original `0x80320000` is gone.
|
||||
|
||||
**The complete fix chain (all on `fix/focas-poll-io-serialization`, deployed):**
|
||||
1. **FOCAS I/O serialization + read timeout** (`SynchronizedFocasClient`) — the original diagnosed root cause: prevents the poll read hanging on the shared socket.
|
||||
2. **`RestoreApplied`-on-bootstrap** — already in source since `b1b3f3ff`; the wonder fix was deploying the current Host over the stale June-16 binary so the driver re-spawns on restart.
|
||||
3. **Akka→Serilog bridge** (`ConfigureLoggers().AddLogger<SerilogLogger>()` + set static `Serilog.Log.Logger` in Program.cs) — made the driver-host actor observable; this is what surfaced the next two issues.
|
||||
4. **`FlexibleStringConverter`** on the FOCAS config `Series` — the AdminUI persists the enum as a number (`"series":6`); the factory now tolerates number-or-string instead of throwing → stub.
|
||||
5. **Scheme-less host tolerance** in `FocasHostAddress.TryParse` — the AdminUI persists `hostAddress` as a bare `ip:port`; `TryParse` now accepts it (canonical `focas://` unchanged) instead of failing init.
|
||||
- FOCAS test suite **247 green**; each fix carries a regression test.
|
||||
- **Follow-up (product quality):** the AdminUI authors FOCAS configs (`series` as number, `hostAddress` without `focas://`) that the driver only now tolerates — the AdminUI↔driver config-format mismatch is worth reconciling at the source. Also: the shared `AddZbSerilog` not setting static `Serilog.Log.Logger` is a latent gap across all 3 apps. The FixedTree-under-Equipment feature (task #14) is now **BUILT** (offline-complete; see the 2026-06-26 design + implementation-plan docs above) — live wonder validation pending.
|
||||
|
||||
## Phase 2 — Get OtOpcUa runtime logs on wonder
|
||||
Make the Host emit driver-level logs so the data plane is observable. Options (least invasive first): point the service at a Serilog file sink via config/env, or temporarily run with `DOTNET_ENVIRONMENT=Development` (file sink + dev errors — cf. MxGateway note), or add a console capture. Preserve `appsettings*`/`data\`; restore the env after. Then read: did `InitializeAsync` start the FixedTree loop, does the bootstrap throw (and on which call), is `ReadAsync` invoked for the equipment tags, what does it return.
|
||||
- **Access:** servecli `:2222`, key `~/.ssh/servecli_wonder`, `scratchpad/wonder-ps.sh` (base64 PS over the cmd PTY), SFTP root `C:\Users\dohertj2\Desktop\win64`.
|
||||
- **Output:** the actual runtime behavior of the data poll + equipment read on the box.
|
||||
|
||||
## Phase 3 — Local reproduction in docker-dev (isolate FOCAS-specific vs general)
|
||||
Reproduce off the production box: in the docker-dev OtOpcUa, configure an equipment tag bound to a driver and check whether values flow at all. Use a driver with an easy local source (e.g. Modbus against a local sim, or the FOCAS wire client against a v1 responder if one can be stood up). If equipment-tag values flow for another driver locally but not FOCAS → FOCAS-specific; if they don't flow for any → a general equipment-projection/data-plane gap. A local repro gives a full-logging debug loop.
|
||||
- **Output:** scope (FOCAS-only vs general) + a local failing case to fix against.
|
||||
|
||||
## Phase 4 — Root cause + minimal fix
|
||||
From Phases 1–3, fix the smallest thing that makes the equipment tag carry a value and (if H2 is not by-design) the FixedTree surface. Likely shapes: a swallowed bootstrap exception; a seed/poll-group wiring gap; an Equipment-projection that should include FixedTree/driver auto-nodes; or a data-plane gate that needs config on the wonder node.
|
||||
|
||||
## Phase 5 — Validate
|
||||
- Local (docker-dev) green where reproduced; unit/integration tests for the fixed path.
|
||||
- Live: re-deploy to wonder, then via the OtOpcUa CLI confirm `parts-count`/`parts-required` read **Good** (value 0 is correct on this idle machine — assert status, not magnitude) and, if applicable, FixedTree Identity/Axes nodes appear with live values. The live Makino helps for FixedTree but is NOT blocking for the value-flow plumbing (macro values are 0 regardless), so this is lower time-pressure than the v3 capture was.
|
||||
|
||||
## Phase 6 — Docs + commit
|
||||
Update this plan's status + the deployment doc; commit on a branch (separate from `feat/focas-pdu-v3` if the fix is OtOpcUa-core rather than FOCAS-driver). Push per the repo flow when asked.
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
- **Execution approach:** this is a *debugging* investigation (unknown root cause) — diagnose before fixing; reproduce before claiming a fix; change the smallest thing. Don't deploy a guessed fix to the production CNC node.
|
||||
- **Key node IDs / endpoints:** equipment `ns=2;s=EQ-3686c0272279`; tags `…/parts-count` (`MACRO:3901`), `…/parts-required` (`MACRO:3902`); OPC `opc.tcp://wonder-app-vd03.zmr.zimmer.com:4840/OtOpcUa`; AdminUI `http://wonder-app-vd03.zmr.zimmer.com:9000` (DisableLogin); deploy = AdminUI `/deployments` → "Deploy current configuration" (Blazo→Akka; no headless API).
|
||||
- **Reusable tools:** `scratchpad/focas-status/` (live IFocasClient harness), `scratchpad/wonder-ps.sh`, `scratchpad/deploy-focas-v3.ps1`; the OtOpcUa CLI `src/Client/ZB.MOM.WW.OtOpcUa.Client.CLI` (`read`/`subscribe`/`browse --recursive`).
|
||||
@@ -0,0 +1,110 @@
|
||||
# FixedTree → Equipment injection — RESUME / work-left handoff
|
||||
|
||||
**Date:** 2026-06-26
|
||||
**Purpose:** survive a context compaction; let a fresh session continue without re-deriving state.
|
||||
|
||||
---
|
||||
|
||||
## TL;DR
|
||||
|
||||
The **FixedTree-under-Equipment dynamic-injection feature is BUILT, offline-complete, AND
|
||||
✅ LIVE-VALIDATED on wonder (2026-06-26)** — 11 tasks, all reviewed, full offline suite green, final
|
||||
integration review = ready to merge, and the real OPC injection confirmed on `wonder-app-vd03` (57 nodes
|
||||
grafted under `EQ-3686c0272279`, all reading Good live values). It lives on a **local, unpushed** branch.
|
||||
The only substantive thing left is the user's decision on push/PR/merge (§1). A few documented non-blocking
|
||||
follow-ups remain (§3).
|
||||
|
||||
## Git state (exact)
|
||||
|
||||
- **Branch:** `feat/focas-fixedtree-equipment-injection` (in the main working dir `/Users/dohertj2/Desktop/OtOpcUa`, NOT a worktree).
|
||||
- **Base:** branched off `fix/focas-poll-io-serialization` (the symptom-#1 data-plane fix — itself ahead of `master`, pushed to gitea with its own open PR, NOT merged). So this feature **stacks on an unmerged branch**.
|
||||
- **Commits:** 14, range `da55c69`..`37cac5de` (10 task commits + 4 review-fix/docs commits). All **local — nothing pushed.**
|
||||
- **User decision (2026-06-26):** finishing-a-development-branch → **"Keep as-is."** Do NOT push/merge/discard without an explicit new go-ahead. Standing rule: **commit/push only when asked.**
|
||||
- **Untouched pre-existing working-tree edits** (leave alone; never stage): `CLAUDE.md`, `docker-dev/docker-compose.yml`, `pending.md`, `stillpending.md`, `docs/plans/2026-06-19-followups-batch.md.tasks.json`.
|
||||
- This RESUME doc itself is currently **uncommitted** (a working artifact).
|
||||
|
||||
## What the feature does
|
||||
|
||||
Generic post-connect `ITagDiscovery` injection (NOT FOCAS-special-cased). On driver Connect:
|
||||
`DriverInstanceActor` runs bounded re-discovery (Timers single-tick, generation-guarded, stop-on-stable +
|
||||
attempt cap, re-kicks on reconnect) into a capturing `IAddressSpaceBuilder` → ships `DiscoveredNodesReady`
|
||||
→ `DriverHostActor` resolves the equipment via authored `EquipmentTags`, maps the nodes under
|
||||
`EQ-…/FOCAS/…` (read-only; single device-host folder collapsed) via `DiscoveredNodeMapper`, extends
|
||||
`_nodeIdByDriverRef`, caches the plan, Tells `OpcUaPublishActor.MaterialiseDiscoveredNodes` →
|
||||
`AddressSpaceApplier` → sink `EnsureFolder`/`EnsureVariable` + `RaiseNodesAddedModelChange` (NodeAdded), and
|
||||
re-sends `SetDesiredSubscriptions(authored ∪ FixedTree refs)` so values flow through the existing
|
||||
poll→push path. Survives redeploys (re-applied at the tail of `PushDesiredSubscriptions` from the cache)
|
||||
and restarts (re-discovered on reconnect).
|
||||
|
||||
## Verification (offline) — all green as of 2026-06-26
|
||||
|
||||
- `dotnet build ZB.MOM.WW.OtOpcUa.slnx` → **0 errors, 0 warnings** (TreatWarningsAsErrors on).
|
||||
- `dotnet test … --filter "FullyQualifiedName~Runtime.Tests"` → **312 passed**.
|
||||
- `dotnet test … --filter "FullyQualifiedName~OpcUaServer.Tests"` → **304 passed**.
|
||||
- `dotnet test … --filter "FullyQualifiedName~FOCAS"` → **324 passed, 10 skipped** (the skips are live-wire integration tests needing the physical CNC — expected).
|
||||
- Final integration review: **ready to merge** (3 non-blocking Minors — see Follow-ups).
|
||||
- Known env limitation (not a failure): the net48 `Driver.Historian.Wonderware.Tests` can't run its testhost on macOS — run the **filtered** suites above, not a full-solution `dotnet test`.
|
||||
|
||||
## Key files / anchors
|
||||
|
||||
- Design: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md` (status = Implemented; has the follow-ups).
|
||||
- Plan + task journal: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection.md` (+ `.md.tasks.json`, all tasks completed).
|
||||
- Investigation plan (symptom #2 marked BUILT): `docs/plans/2026-06-25-otopcua-equipment-dataplane-investigation.md`.
|
||||
- Deployment doc (FixedTree section added): `docs/deployments/wonder-app-vd03-makino-z-34184.md`.
|
||||
- New code:
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNode.cs`, `CapturingAddressSpaceBuilder.cs`, `DiscoveredNodeMapper.cs`
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/DiscoveredInjection.cs` (DTOs)
|
||||
- modified: `DriverInstanceActor.cs`, `DriverHostActor.cs`, `OpcUaPublishActor.cs`, `AddressSpaceApplier.cs`, `OtOpcUaNodeManager.cs`, `IOpcUaAddressSpaceSink.cs` (+ `SdkAddressSpaceSink.cs`, `DeferredAddressSpaceSink.cs`)
|
||||
- tests: `tests/Server/…Runtime.Tests/Drivers/{CapturingAddressSpaceBuilderTests,DiscoveredNodeMapperTests,DriverInstanceActorDiscoveryTests,DriverHostActorDiscoveryTests,DiscoveryInjectionEndToEndTests}.cs`, `…OpcUaServer.Tests/NodeManagerModelChangeOnAddTests.cs`, edits to `AddressSpaceApplierTests.cs`/`OpcUaPublishActorTests.cs`.
|
||||
- Memory: `…/memory/wonder-otopcua-focas-and-akka-roles.md` (RESUME-ANCHOR bullet updated to record this feature; read it for the broader wonder/FOCAS context + box-access recipe).
|
||||
|
||||
## WORK LEFT (prioritized)
|
||||
|
||||
### 1. Decide the git endgame (user-gated)
|
||||
Pick one, only on explicit user go-ahead:
|
||||
- **Push + PR** — `git push -u origin feat/focas-fixedtree-equipment-injection`; PR base is `fix/focas-poll-io-serialization` (stacked) or `master` (will show both features' commits). gitea repo: `lmxopcua`.
|
||||
- **Merge locally** into `fix/focas-poll-io-serialization` (folds both features onto one branch/PR).
|
||||
- Keep waiting until after live validation (current state).
|
||||
|
||||
### 2. Live wonder validation — ✅ DONE 2026-06-26
|
||||
**Validated live on `wonder-app-vd03`.** Built a full self-contained Host overlay from this branch @
|
||||
`37cac5de`, deployed to `E:\ApiInstall\OtOpcUa` (stop → backup `E:\ApiInstall\OtOpcUa_bak-20260626111416`
|
||||
→ robocopy overlay preserving `appsettings*.json` + `pki\` → restart). Baseline before deploy: only
|
||||
`parts-count`/`parts-required` under `EQ-3686c0272279`. After deploy + FOCAS reconnect: the host log
|
||||
recorded `injected 57 discovered node(s) … under EQ-3686c0272279` / `materialised … (folders=14, vars=57)`,
|
||||
no exceptions. CLI browse showed the full `FOCAS/` subtree (Identity/Axes X-Y-Z-B-C-AA+Actual/Spindle/
|
||||
Program/OperationMode/Timers), idempotent across repeats, device-host folder collapsed. Sample reads all
|
||||
Good: `Identity/SeriesNumber=G431`, `CncType=31`, `AxisCount=7`, `Axes/X/AbsolutePosition=2801574` (live),
|
||||
`OperationMode/ModeText=TJOG`; authored tags still Good (no regression). `/healthz` 200 Healthy throughout.
|
||||
Result recorded in `docs/deployments/wonder-app-vd03-makino-z-34184.md`. **The substantive remaining work
|
||||
is now the git endgame (§1) only.** Original recipe retained below for reference:
|
||||
|
||||
The offline e2e asserts the recording-sink contract, NOT the real `OtOpcUaNodeManager` seed→overwrite at
|
||||
the OPC node layer. Live validation closes that gap. Recipe (mirrors the symptom-#1 deploy):
|
||||
1. Build the current Host self-contained: `dotnet publish src/…/ZB.MOM.WW.OtOpcUa.Host…csproj -c Release -r win-x64 --self-contained true -p:PublishSingleFile=false`. **Must be a full self-contained publish-overlay, NOT a DLL swap** — the box is self-contained (DLL swaps crashed: FileNotFound / "Could not resolve CoreCLR path"). Note: deploying the current Host already happened for symptom #1; if the box is at the symptom-#1 build, this feature's DLLs (Runtime + OpcUaServer + Commons + the new Runtime/Drivers files) must be included in the overlay — so a fresh full overlay from THIS branch is the safe path.
|
||||
2. Box access: servecli `:2222`, key `~/.ssh/servecli_wonder`, user `dohertj2`; drive via `scratchpad/wonder-ps.sh` (base64 PS over cmd PTY); SFTP root `C:\Users\dohertj2\Desktop\win64`. Service `OtOpcUaHost`. Overlay onto `E:\ApiInstall\OtOpcUa` **preserving `pki\` + `appsettings*.json` + `data\`**; back up first; auto-rollback if unhealthy.
|
||||
3. Restart `OtOpcUaHost`; confirm member Up w/ ADMIN+DRIVER (roles env already set), `/healthz` Healthy, OPC `:4840` listening.
|
||||
4. The FOCAS driver connects → ~0–2 s later FixedTree populates → injection fires. Validate via the OtOpcUa CLI (`src/Client/…Client.CLI`) against `opc.tcp://wonder-app-vd03.zmr.zimmer.com:4840/OtOpcUa` (Security None, anonymous):
|
||||
- `browse --recursive` → expect a `FOCAS` subfolder under `ns=2;s=EQ-3686c0272279` with `Identity/`, `Axes/`, etc.
|
||||
- `read ns=2;s=EQ-3686c0272279/FOCAS/Identity/SeriesNumber` → expect Good (a real string).
|
||||
- `read ns=2;s=EQ-3686c0272279/FOCAS/Axes/X/AbsolutePosition` → expect Good (value may be 0 on idle machine — assert STATUS, not magnitude).
|
||||
- The authored `parts-count`/`parts-required` should remain Good (symptom #1 fix).
|
||||
5. If a value reads Bad, the symptom-#1 self-healing applies (recoverable `BadCommunicationError`, observable in Serilog at `C:\Windows\System32\logs\otopcua-<date>.log`). The Akka→Serilog bridge (from symptom #1) makes `DriverHost`/`DriverInstance`/discovery logs visible.
|
||||
|
||||
### 3. Non-blocking follow-ups
|
||||
**✅ ALL FIXEDTREE FOLLOW-UPS (A–E) IMPLEMENTED 2026-06-26** — design+plan
|
||||
`2026-06-26-otopcua-fixedtree-followups{-design,}.md`; 16 commits `c2c368dc`..`0074f37a` on this branch
|
||||
(every task spec+code reviewed; offline suites green). Resolved:
|
||||
- ✅ Config-unchanged rebind now re-triggers discovery (`TriggerRediscovery`) — follow-up C.
|
||||
- ✅ Multi-device-per-driver implemented via `EquipmentNode.DeviceHost` partition; ≥1-authored-tag requirement lifted (driver-binding resolution) — follow-up E (projection-only, no migration / no artifact wire change).
|
||||
- ✅ Per-(re)connect re-discovery policy-gated (`ITagDiscovery.RediscoverPolicy` UntilStable/Once/Never; synchronous drivers → Once) — follow-up B.
|
||||
- ✅ Double `SetDesiredSubscriptions` per redeploy de-duped (one send per driver) — follow-up D.
|
||||
- ✅ Per-pass `DiscoverAsync` timeout made injectable — follow-up A.
|
||||
|
||||
**Still open (out of scope for the FixedTree follow-ups — separate cross-cutting work):**
|
||||
- Cross-cutting (from symptom #1, all 3 apps): shared `AddZbSerilog` doesn't set the static `Serilog.Log.Logger`; AdminUI persists FOCAS config in formats (series-as-number, scheme-less host) the driver only now tolerates — reconcile at the AdminUI source.
|
||||
|
||||
## Context that's easy to lose
|
||||
- 3 real defects were caught + fixed by the review chain during the build: `DriverDataType.ToString()` ≠ OPC type string (`Float64`→`"Double"`); `Server.ReportEvent` under the node `Lock` (deadlock); `ConfigureAwait(false)` in the discovery handler (off-actor-context crash for async drivers like Galaxy sharing the node). All have regression tests.
|
||||
- The plan's Task-3 instruction "keep ReportEvent inside lock" was itself a defect; the plan doc was corrected.
|
||||
- The execution used subagent-driven-development (fresh implementer per task + spec/code reviews; high-risk tasks got Opus reviews, serial). Single-writer discipline was enforced (no concurrent `dotnet` builds → no obj/bin or git-index races).
|
||||
@@ -0,0 +1,205 @@
|
||||
# OtOpcUa — dynamic injection of driver-discovered FixedTree nodes into the Equipment projection (design)
|
||||
|
||||
**Date:** 2026-06-26
|
||||
**Status:** ✅ Implemented (2026-06-26) — 11 tasks, offline-complete on branch `feat/focas-fixedtree-equipment-injection` (solution build 0 errors / 0 warnings; Runtime.Tests 312, OpcUaServer.Tests 304, FOCAS 247 + an end-to-end injection+value-flow test, all green). Live wonder validation pending.
|
||||
|
||||
**Follow-ups surfaced during the review chain — ✅ ALL RESOLVED 2026-06-26** (design
|
||||
[`2026-06-26-otopcua-fixedtree-followups-design.md`](2026-06-26-otopcua-fixedtree-followups-design.md),
|
||||
plan [`2026-06-26-otopcua-fixedtree-followups.md`](2026-06-26-otopcua-fixedtree-followups.md);
|
||||
16 commits `c2c368dc`..`0074f37a` on this branch, every task spec+code reviewed; full offline suite green):
|
||||
- ✅ Config-unchanged driver→equipment **rebind** now **re-triggers discovery** (follow-up C): the redeploy re-inject tail drops the stale plan AND `Tell`s the driver child a new `DriverInstanceActor.TriggerRediscovery` (a discovery action — not lifecycle control — idempotent, child no-ops if not Connected), so the FixedTree re-grafts under the new equipment on the next pass instead of waiting for the next natural reconnect.
|
||||
- ✅ **Multi-device-per-driver** mapping **implemented** (follow-up E): `EquipmentNode` now carries `DriverInstanceId`/`DeviceId`/`DeviceHost` (projection-only — the columns + the `Devices` array were already in the artifact, no DB migration / no wire change), so equipment resolves via the driver binding **without** authored tags (≥1-tag requirement lifted), and a driver bound to multiple devices partitions its discovered tree by normalized device-host folder, grafting each device's subtree under the equipment whose `DeviceHost` matches (unmatched hosts warn-skip, never mis-graft).
|
||||
- ✅ Per-(re)connect re-discovery is now **policy-gated** (follow-up B): `ITagDiscovery.RediscoverPolicy` (`UntilStable`/`Once`/`Never`, default `UntilStable`) — FOCAS stays `UntilStable` (its FixedTree cache fills asynchronously after connect); the synchronous-discovery drivers (OpcUaClient/TwinCAT/AbCip/AbLegacy/Modbus/S7/Galaxy) are `Once`, dropping the wasteful 15× retry. The hardcoded 30 s per-pass discovery timeout is now injectable too (follow-up A).
|
||||
- ✅ The OPC-node-layer seed→serve gap (recording-sink-only e2e) was closed by the **live wonder deploy** of the base feature (validated 2026-06-26; see the deployment record).
|
||||
**Companion to:** [`2026-06-25-otopcua-equipment-dataplane-investigation.md`](2026-06-25-otopcua-equipment-dataplane-investigation.md) (symptom #1 — live FOCAS values — FIXED + deployed; this design addresses **symptom #2**).
|
||||
**Base branch:** `fix/focas-poll-io-serialization` (this feature builds on the now-deployed driver-host bootstrap re-spawn + FOCAS I/O fixes; that branch is ahead of `master` and not yet merged).
|
||||
|
||||
---
|
||||
|
||||
## Problem
|
||||
|
||||
Deployed FOCAS equipment serves only its **authored** config tags (`parts-count`/`parts-required`). The driver's
|
||||
**FixedTree** (Identity / Axes / Spindle / Program / Timers — the auto-discovered CNC structure) **never appears** under
|
||||
the served Equipment/UNS address space.
|
||||
|
||||
**Root cause (confirmed in the investigation, H2):** the served Equipment tree is built **purely from Config-DB entities**
|
||||
(`AddressSpaceComposer.Compose` → `AddressSpaceApplier` → node manager). The only code that emits FixedTree nodes is
|
||||
`ITagDiscovery.DiscoverAsync` (each driver implements it), reachable **only** through `GenericDriverNodeManager.BuildAddressSpaceAsync`
|
||||
— which has **no runtime caller** (its referenced host method `OpcUaApplicationHost.PopulateAddressSpaces` no longer exists).
|
||||
So `DiscoverAsync`/`ITagDiscovery` is **dead for serving**: every served node is config-driven, and nothing surfaces a
|
||||
driver's discovered hierarchy.
|
||||
|
||||
Surfacing FixedTree under the Equipment node is therefore a **new dynamic-node-injection capability**, and it must solve a
|
||||
**timing problem**: composition runs at deploy/apply time (before the driver connects), but the FixedTree shape
|
||||
(axis count, spindle presence, which sections exist) is **capability-discovered ~0–2 s after the driver connects**
|
||||
(`FocasDriver` populates `state.FixedTreeCache` in its bootstrap loop).
|
||||
|
||||
## Goal
|
||||
|
||||
After a driver connects, dynamically graft its discovered FixedTree nodes into the served Equipment projection under a
|
||||
driver-named subfolder, e.g.:
|
||||
|
||||
```
|
||||
ns=2;s=EQ-3686c0272279 (equipment "z-34184")
|
||||
├── parts-count (authored config tag — unchanged)
|
||||
├── parts-required (authored config tag — unchanged)
|
||||
└── FOCAS (NEW — driver-named discovered subfolder)
|
||||
├── Identity/{SeriesNumber, Version, MaxAxes, CncType, MtType, AxisCount}
|
||||
├── Axes/{<axis>/{AbsolutePosition, MachinePosition, RelativePosition, DistanceToGo}, FeedRate/Actual, SpindleSpeed/Actual}
|
||||
├── Spindle/{<name>/{Load, MaxRpm}} (capability-gated)
|
||||
├── Program/{Name, ONumber, Number, MainNumber, Sequence, BlockCount} (capability-gated)
|
||||
├── OperationMode/{Mode, ModeText} (capability-gated)
|
||||
└── Timers/{PowerOnSeconds, OperatingSeconds, CuttingSeconds, CycleSeconds} (capability-gated)
|
||||
```
|
||||
|
||||
Read-only value nodes carrying live values (e.g. `EQ-…/FOCAS/Axes/X/AbsolutePosition` reads Good).
|
||||
|
||||
## Decisions (locked with the user 2026-06-26)
|
||||
|
||||
| Decision | Choice |
|
||||
|---|---|
|
||||
| Driver scope | **Generic** — keyed off the shared `ITagDiscovery` interface (FOCAS, Galaxy, Modbus all implement it). FOCAS is the first/test consumer; others get it for free. **Zero per-driver code changes.** |
|
||||
| Tree placement | **Under a driver-named subfolder** — `EQ-…/FOCAS/…` (collision-safe vs. authored tags; self-describing). |
|
||||
| Device-host folder | **Collapse** the single device-host level → `EQ-…/FOCAS/Identity/…` (not `EQ-…/FOCAS/10.201.31.5:8193/Identity/…`), valid because today's deployment is strictly 1:1 driver↔equipment↔device. |
|
||||
| Model-change notification | **Emit `GeneralModelChangeEvent`** after a runtime add so already-connected OPC UA clients can refresh their browse. |
|
||||
| Multi-device-per-driver | **Deferred** at base-feature time; ✅ **implemented as follow-up E** (2026-06-26) — `EquipmentNode.DeviceHost` partition. |
|
||||
| Discovered alarms | **Out of scope** — this feature surfaces value nodes only; alarms continue to come via the config path. |
|
||||
| Writable discovered nodes | **Out of scope** — FixedTree is read-only CNC state. |
|
||||
|
||||
## Approach (chosen): runtime post-connect injection via the actor pipeline
|
||||
|
||||
Treat discovered FixedTree nodes as **"synthetic equipment tags" injected at runtime**, reusing the existing
|
||||
materialize → subscribe → poll → push pipeline end-to-end. Only three new pieces; **no driver changes** (each driver's
|
||||
existing `DiscoverAsync` is reused verbatim via a capturing builder).
|
||||
|
||||
**Rejected alternatives:**
|
||||
- *Composition-time pre-projection* — can't author the right nodes before the driver discovers capabilities; defeats the purpose.
|
||||
- *Resurrect `GenericDriverNodeManager` as a 2nd namespace (ns=3)* — puts FixedTree in a separate tree (not **under** the equipment node), and that namespace's value-routing is also dead; more dead code to revive, wrong location.
|
||||
- *Cheap baseline: author a Config-DB Tag row per FixedTree signal* — no new code, but static (can't adapt to per-CNC capabilities) and per-signal × per-machine manual authoring. User chose to build the dynamic feature instead.
|
||||
|
||||
## Components
|
||||
|
||||
### 1. `CapturingAddressSpaceBuilder` (new — runtime)
|
||||
An `IAddressSpaceBuilder` implementation that **records** the streamed tree instead of creating OPC UA nodes. After a
|
||||
driver's `DiscoverAsync(builder)` returns, it exposes a flat `IReadOnlyList<DiscoveredNode>`:
|
||||
|
||||
```
|
||||
DiscoveredNode {
|
||||
IReadOnlyList<string> FolderPathSegments, // e.g. ["FOCAS", "<deviceHost>", "Identity"]
|
||||
string BrowseName, string DisplayName,
|
||||
string FullReference, // == DriverAttributeInfo.FullName (the driver ref + routing key)
|
||||
DriverDataType DataType, bool IsArray, uint? ArrayDim,
|
||||
bool Writable, bool IsHistorized
|
||||
}
|
||||
```
|
||||
|
||||
- `Folder(browse, display)` returns a child capturing scope; `Variable(...)` records a node and returns an
|
||||
`IVariableHandle` whose `FullReference` is `DriverAttributeInfo.FullName`.
|
||||
- `MarkAsAlarmCondition(...)` returns a **no-op** sink; `AddProperty(...)` is **ignored** — value nodes only.
|
||||
|
||||
### 2. `DriverInstanceActor` — post-connect discovery (bounded retry)
|
||||
On entering `Connected`, kick a bounded re-discovery:
|
||||
1. Run `DiscoverAsync(capturingBuilder)` against the live `IDriver` it owns.
|
||||
2. `Tell` the parent `DriverHostActor` a new message `DiscoveredNodesReady(DriverInstanceId, IReadOnlyList<DiscoveredNode>)`.
|
||||
3. Because FOCAS suppresses FixedTree until `FixedTreeCache` populates (~0–2 s), **retry** every ~2 s up to a cap
|
||||
(~30 s) **or until the captured set stops growing**, then stop. `DiscoverAsync` reads the in-memory cache (no extra
|
||||
wire I/O), so retries are cheap. Re-runs on every reconnect (downstream is idempotent).
|
||||
|
||||
*(Drivers whose discovery is ready immediately — e.g. Galaxy/Modbus — satisfy this on the first attempt.)*
|
||||
|
||||
### 3. `DriverHostActor` — injection handler
|
||||
On `DiscoveredNodesReady(id, nodes)`:
|
||||
1. Find the equipment bound to the driver instance: `composition.EquipmentNodes` where `DriverInstanceId == id`.
|
||||
- 0 matches → log Info, skip. >1 match → log Warning, skip (multi-device follow-up).
|
||||
2. **Dedup** discovered `FullReference`s against authored `EquipmentTags` for that driver (never double-create
|
||||
`parts-count`, etc.).
|
||||
3. Map each remaining node to a NodeId `EQ-…/FOCAS/<collapsed-path>/<name>` via `EquipmentNodeIds.Variable(...)`
|
||||
(collapse the single device-host folder level).
|
||||
4. **Cache** the mapped result in `_discoveredByDriver[id]` (survives redeploys — see Lifecycle).
|
||||
5. Update `_nodeIdByDriverRef[(id, FullReference)]` for each.
|
||||
6. `Tell` `OpcUaPublishActor` a new `MaterialiseDiscoveredNodes(equipmentId, "FOCAS", nodes)`.
|
||||
7. Merge the new refs into the driver's desired set and re-`Tell`
|
||||
`DriverInstanceActor.SetDesiredSubscriptions(union, interval, alarmRefs)` — the existing **live path** immediately
|
||||
re-subscribes (the actor self-`Tell`s `Subscribe` when already `Connected`).
|
||||
|
||||
### 4. `OpcUaPublishActor` / node manager — incremental materialize
|
||||
New message `MaterialiseDiscoveredNodes(equipmentId, driverSubfolder, nodes)`:
|
||||
- Idempotent `EnsureFolder` / `EnsureVariable` calls (the node manager already supports incremental add under `Lock`
|
||||
via `AddChild` + `AddPredefinedNode`; `EnsureVariable` early-returns if the node exists).
|
||||
- Variables materialize **read-only** (no `OnWriteValue`).
|
||||
- After adding, emit a `GeneralModelChangeEvent` so connected clients can refresh their browse (the full-rebuild path
|
||||
does not emit one; runtime adds should).
|
||||
|
||||
## Data flow (value path — fully reused)
|
||||
|
||||
```
|
||||
SetDesiredSubscriptions(union) → DriverInstanceActor subscribes the FixedTree refs
|
||||
→ PollGroupEngine polls each ref via FocasDriver.ReadAsync
|
||||
→ TryReadFixedTree (cache lookup, NO extra wire I/O)
|
||||
→ onChange → AttributeValuePublished(FullReference)
|
||||
→ DriverHostActor.ForwardToMux
|
||||
→ _nodeIdByDriverRef[(driverId, ref)] → AttributeValueUpdate(nodeId, value, quality, ts)
|
||||
→ OtOpcUaNodeManager writes the node value
|
||||
```
|
||||
|
||||
The routing key is **consistent by construction**: the capturing builder records `handle.FullReference`, which is exactly
|
||||
the ref the driver publishes (`AttributeValuePublished.FullReference`) and the ref `TryReadFixedTree` matches
|
||||
(`reference.StartsWith(state.Options.HostAddress + "/")`).
|
||||
|
||||
## Lifecycle / re-injection robustness (the timing problem, solved)
|
||||
|
||||
- **First connect:** driver connects → ~0–2 s later `FixedTreeCache` populates → bounded re-discovery catches it → inject.
|
||||
- **Redeploy with a structural `RebuildAddressSpace`:** the full teardown wipes injected nodes and `PushDesiredSubscriptions`
|
||||
rebuilds `_nodeIdByDriverRef` from authored tags only. **Fix:** after every `PushDesiredSubscriptions`, `DriverHostActor`
|
||||
**re-applies its cached `_discoveredByDriver`** (re-materialize + re-map + re-merge refs) — so FixedTree survives
|
||||
redeploys without re-querying the driver.
|
||||
- **Process restart:** `_discoveredByDriver` is lost, but `RestoreApplied` re-spawns drivers → each reconnects →
|
||||
post-connect re-discovery re-injects (same ~0–2 s delay). Consistent with the symptom-#1 restore behavior already
|
||||
deployed.
|
||||
- **Idempotent throughout:** `EnsureFolder`/`EnsureVariable` early-return if present; `_nodeIdByDriverRef` is set-based;
|
||||
`SetDesiredSubscriptions` is idempotent.
|
||||
|
||||
## Error handling
|
||||
|
||||
- Discovery throws / driver not ready → bounded retry, then give up quietly (Info); authored tags unaffected.
|
||||
- No equipment bound to the driver instance, or ambiguous (multi-equipment) → Warning, skip injection.
|
||||
- A FixedTree ref that fails to read at poll time → flows the same recoverable `BadCommunicationError` push as any
|
||||
equipment tag (the symptom-#1 fix) — observable, not silent.
|
||||
|
||||
## Testing
|
||||
|
||||
- **Unit:**
|
||||
- `CapturingAddressSpaceBuilder` records the tree + refs from a fake `ITagDiscovery` (folders, nested variables,
|
||||
no-op alarm sink, ignored properties).
|
||||
- Injector mapping: discovered nodes → `EQ-…/FOCAS/…` NodeIds; dedup against authored tags; device-host-folder collapse.
|
||||
- `DriverInstanceActor` bounded post-connect re-discovery (set becomes non-empty on the Nth attempt; stops on cap / no-growth).
|
||||
- `DriverHostActor` `DiscoveredNodesReady` handling + re-inject-after-`PushDesiredSubscriptions`.
|
||||
- Read-only materialization (no write callback).
|
||||
- **Integration (docker-dev):** a fake `ITagDiscovery` driver exposing a *delayed* discovery set → assert nodes appear
|
||||
under the equipment and carry values; verify survival across a redeploy + a process restart.
|
||||
- **Live (wonder, following the symptom-#1 pattern):** deploy the current Host + this change, browse
|
||||
`EQ-3686c0272279/FOCAS/Identity/SeriesNumber` and `…/Axes/X/AbsolutePosition`, confirm Good values. The live deploy is
|
||||
**not** blocking for the build (macro/axes values may be 0 on the idle machine — assert status, not magnitude); confirm
|
||||
the live-deploy step with the user at execution time.
|
||||
|
||||
## Scope / non-goals
|
||||
|
||||
- **In:** read-only value nodes for any `ITagDiscovery` driver; 1:1 driver↔equipment; survives redeploy/restart; generic
|
||||
mechanism with FOCAS as the first consumer.
|
||||
- **Out (documented follow-ups):** discovered **alarms** injection; multi-device-per-driver-instance mapping; writable
|
||||
discovered nodes.
|
||||
|
||||
## Touched code (anticipated)
|
||||
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` — `DiscoveredNodesReady` handler, `_discoveredByDriver`
|
||||
cache, re-inject after `PushDesiredSubscriptions`, desired-set merge.
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` — post-connect bounded re-discovery + new message.
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs` — `MaterialiseDiscoveredNodes` receive.
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` — `GeneralModelChangeEvent` emit on runtime add (verify
|
||||
existing helper).
|
||||
- New: `CapturingAddressSpaceBuilder` + `DiscoveredNode` DTO (runtime), `EquipmentNodeIds` reuse for mapping.
|
||||
- Tests under `tests/...Runtime.Tests` / `tests/...OpcUaServer.Tests` and a fake `ITagDiscovery` test double.
|
||||
|
||||
## Task tracking
|
||||
|
||||
Umbrella native task **#14** (FixedTree feature). Implementation tasks to be generated by writing-plans from this design.
|
||||
@@ -0,0 +1,759 @@
|
||||
# FixedTree → Equipment dynamic-injection Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** After an `ITagDiscovery` driver connects, dynamically graft its discovered FixedTree nodes into the served Equipment/UNS OPC UA address space under a driver-named subfolder (`EQ-…/FOCAS/…`), carrying live values — reusing the existing materialize → subscribe → poll → push pipeline.
|
||||
|
||||
**Architecture:** Treat discovered nodes as "synthetic equipment tags" injected at runtime. A capturing `IAddressSpaceBuilder` records each driver's `DiscoverAsync` output (zero driver changes); `DriverInstanceActor` runs discovery post-connect (bounded retry, since FOCAS's `FixedTreeCache` populates ~0–2 s after connect) and ships a `DiscoveredNodesReady` message; `DriverHostActor` maps the nodes under the equipment, extends `_nodeIdByDriverRef` + the desired-subscription set, and tells `OpcUaPublishActor` to incrementally materialize them (idempotent `EnsureFolder`/`EnsureVariable`, no full teardown), emitting a `GeneralModelChangeEvent`. Survives redeploys (re-applied after `PushDesiredSubscriptions`) and restarts (re-discovered on reconnect).
|
||||
|
||||
**Tech Stack:** .NET 10, Akka.NET (Akka.Hosting, Akka.TestKit.Xunit2), OPC UA (`OPCFoundation.NetStandard.Opc.Ua`), xUnit v2 + Shouldly.
|
||||
|
||||
**Design doc:** [`2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](2026-06-26-otopcua-fixedtree-equipment-injection-design.md). Base branch: `fix/focas-poll-io-serialization` (this builds on the deployed driver-host bootstrap re-spawn + FOCAS I/O fixes; not yet merged to `master`).
|
||||
|
||||
**Key code anchors (verified 2026-06-26):**
|
||||
- `IAddressSpaceBuilder` / `IVariableHandle` / `DriverAttributeInfo` — `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/`.
|
||||
- Reference capturing builder (flat collector): `src/Drivers/Cli/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.Cli/Commands/BrowseCommand.cs:120` (`CollectingAddressSpaceBuilder`).
|
||||
- NodeId scheme: `src/Core/ZB.MOM.WW.OtOpcUa.Commons/OpcUa/EquipmentNodeIds.cs` (`Variable(equipmentId, folderPath, name)` → `{parent}/{name}`; `SubFolder` → `{equipmentId}/{folderPath}`).
|
||||
- Materialize pattern: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceApplier.cs:248` (`MaterialiseEquipmentTags`) + `SafeEnsureFolder`/`SafeEnsureVariable`.
|
||||
- Node manager: `OtOpcUaNodeManager.EnsureFolder` (`:1282`), `EnsureVariable` (`:1367`, seeds `BadWaitingForInitialData`), `BuildNodeShapeChangedEvent` (`:1525`, verb `DataTypeChanged` — model for a `NodeAdded` sibling).
|
||||
- Publish actor receive + materialize calls: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs:217` (Receive block), `HandleRebuild` (`:275`).
|
||||
- Driver value route: `DriverHostActor.ForwardToMux` (`:525`), `_nodeIdByDriverRef` built in `PushDesiredSubscriptions` (`:1019`, sends `SetDesiredSubscriptions` `:1052`), `ChildEntry` (`:203`), Receive blocks (`:482`, `:512`).
|
||||
- Driver connect hook: `DriverInstanceActor` `_driver` field (`:110`), `Connected()` (`:317`), transition at `InitializeSucceeded` (`:278`); `SetDesiredSubscriptions` live re-subscribe path (`:340-353`).
|
||||
- FOCAS discovery (reused verbatim): `FocasDriver.DiscoverAsync` (`:408`) emits `FOCAS/{deviceHost}/<section>/…`; FixedTree leaf `FullName` = `{deviceHost}/{path}`; suppresses FixedTree until `FixedTreeCache` set.
|
||||
|
||||
---
|
||||
|
||||
## Conventions for every task
|
||||
|
||||
- **TDD:** write the failing test first, run it (confirm the expected failure), implement minimally, run again (green), commit.
|
||||
- **Build:** `dotnet build ZB.MOM.WW.OtOpcUa.slnx` from the repo root.
|
||||
- **Run a single test class:** `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~<ClassName>"`.
|
||||
- **Commits:** Conventional Commits, on `fix/focas-poll-io-serialization` (do NOT touch the pre-existing unrelated working-tree edits: `CLAUDE.md`, `docker-dev/docker-compose.yml`, `pending.md`, `stillpending.md`, `docs/plans/2026-06-19-followups-batch.md.tasks.json` — `git add` only this feature's files).
|
||||
- **No new dependencies, no proto change, no EF migration.** All edits are within existing projects.
|
||||
|
||||
---
|
||||
|
||||
## Task 1: `DiscoveredNode` DTO + path-tracking `CapturingAddressSpaceBuilder`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 3
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNode.cs`
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/CapturingAddressSpaceBuilder.cs`
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/CapturingAddressSpaceBuilderTests.cs`
|
||||
|
||||
Unlike the CLI's flat `CollectingAddressSpaceBuilder`, this one **tracks folder nesting** so each variable records its full path segments (e.g. `["FOCAS","10.201.31.5:8193","Identity"]` + browse `SeriesNumber`).
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```csharp
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class CapturingAddressSpaceBuilderTests
|
||||
{
|
||||
[Fact]
|
||||
public void Records_nested_path_segments_full_reference_and_metadata()
|
||||
{
|
||||
var b = new CapturingAddressSpaceBuilder();
|
||||
var focas = b.Folder("FOCAS", "FOCAS");
|
||||
var device = focas.Folder("10.0.0.5:8193", "cnc");
|
||||
var identity = device.Folder("Identity", "Identity");
|
||||
identity.Variable("SeriesNumber", "SeriesNumber", new DriverAttributeInfo(
|
||||
FullName: "10.0.0.5:8193/Identity/SeriesNumber",
|
||||
DriverDataType: DriverDataType.String, IsArray: false, ArrayDim: null,
|
||||
SecurityClass: SecurityClassification.ViewOnly, IsHistorized: false));
|
||||
|
||||
b.Nodes.Count.ShouldBe(1);
|
||||
var n = b.Nodes[0];
|
||||
n.FolderPathSegments.ShouldBe(new[] { "FOCAS", "10.0.0.5:8193", "Identity" });
|
||||
n.BrowseName.ShouldBe("SeriesNumber");
|
||||
n.FullReference.ShouldBe("10.0.0.5:8193/Identity/SeriesNumber");
|
||||
n.DataType.ShouldBe(DriverDataType.String);
|
||||
n.Writable.ShouldBeFalse(); // ViewOnly → read-only
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AddProperty_is_ignored_and_alarm_marking_is_a_noop_sink()
|
||||
{
|
||||
var b = new CapturingAddressSpaceBuilder();
|
||||
var f = b.Folder("FOCAS", "FOCAS");
|
||||
f.AddProperty("Manufacturer", DriverDataType.String, "FANUC"); // ignored, no throw
|
||||
var h = f.Variable("V", "V", new DriverAttributeInfo("ref", DriverDataType.Int32, false, null,
|
||||
SecurityClassification.ViewOnly, false, IsAlarm: true));
|
||||
var sink = h.MarkAsAlarmCondition(new AlarmConditionInfo("src", AlarmSeverity.Low, null));
|
||||
sink.ShouldNotBeNull(); // no-op sink, alarms out of scope
|
||||
b.Nodes.Count.ShouldBe(1);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run to verify it fails** — `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~CapturingAddressSpaceBuilderTests"` → FAIL (types don't exist).
|
||||
|
||||
**Step 3: Implement `DiscoveredNode.cs`**
|
||||
|
||||
```csharp
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
|
||||
|
||||
/// <summary>
|
||||
/// A flattened variable captured from a driver's <see cref="ITagDiscovery.DiscoverAsync"/> stream
|
||||
/// by <see cref="CapturingAddressSpaceBuilder"/>. Folder nesting is preserved in
|
||||
/// <see cref="FolderPathSegments"/> so the injector can re-root the node under an equipment.
|
||||
/// </summary>
|
||||
public sealed record DiscoveredNode(
|
||||
IReadOnlyList<string> FolderPathSegments,
|
||||
string BrowseName,
|
||||
string DisplayName,
|
||||
string FullReference,
|
||||
DriverDataType DataType,
|
||||
bool IsArray,
|
||||
uint? ArrayDim,
|
||||
bool Writable,
|
||||
bool IsHistorized);
|
||||
```
|
||||
|
||||
**Step 3b: Implement `CapturingAddressSpaceBuilder.cs`**
|
||||
|
||||
```csharp
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
|
||||
|
||||
/// <summary>
|
||||
/// An <see cref="IAddressSpaceBuilder"/> that RECORDS the streamed tree instead of creating OPC UA
|
||||
/// nodes — used to capture an <see cref="ITagDiscovery"/> driver's discovered hierarchy so the
|
||||
/// runtime can graft it under an equipment node. Folder nesting is tracked (each child builder
|
||||
/// carries its accumulated path), so every variable records its full <see cref="DiscoveredNode.FolderPathSegments"/>.
|
||||
/// <para>Value nodes only: <see cref="AddProperty"/> is ignored and alarm marking returns a no-op sink
|
||||
/// (discovered alarms are out of scope — alarms come via the config path).</para>
|
||||
/// <para>Single-threaded: a driver's <c>DiscoverAsync</c> streams on one caller; the root and its child
|
||||
/// builders share one <see cref="List{T}"/>. Not thread-safe by design.</para>
|
||||
/// </summary>
|
||||
public sealed class CapturingAddressSpaceBuilder : IAddressSpaceBuilder
|
||||
{
|
||||
private readonly List<DiscoveredNode> _nodes;
|
||||
private readonly IReadOnlyList<string> _path;
|
||||
|
||||
public CapturingAddressSpaceBuilder() : this([], []) { }
|
||||
|
||||
private CapturingAddressSpaceBuilder(List<DiscoveredNode> nodes, IReadOnlyList<string> path)
|
||||
{
|
||||
_nodes = nodes;
|
||||
_path = path;
|
||||
}
|
||||
|
||||
/// <summary>All variables captured across the whole tree (shared by the root and every child scope).</summary>
|
||||
public IReadOnlyList<DiscoveredNode> Nodes => _nodes;
|
||||
|
||||
public IAddressSpaceBuilder Folder(string browseName, string displayName)
|
||||
=> new CapturingAddressSpaceBuilder(_nodes, [.. _path, browseName]);
|
||||
|
||||
public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo)
|
||||
{
|
||||
_nodes.Add(new DiscoveredNode(
|
||||
FolderPathSegments: _path,
|
||||
BrowseName: browseName,
|
||||
DisplayName: displayName,
|
||||
FullReference: attributeInfo.FullName,
|
||||
DataType: attributeInfo.DriverDataType,
|
||||
IsArray: attributeInfo.IsArray,
|
||||
ArrayDim: attributeInfo.ArrayDim,
|
||||
Writable: attributeInfo.SecurityClass != SecurityClassification.ViewOnly,
|
||||
IsHistorized: attributeInfo.IsHistorized));
|
||||
return new NullHandle(attributeInfo.FullName);
|
||||
}
|
||||
|
||||
public void AddProperty(string browseName, DriverDataType dataType, object? value) { /* metadata only — ignored */ }
|
||||
|
||||
private sealed class NullHandle(string fullRef) : IVariableHandle
|
||||
{
|
||||
public string FullReference => fullRef;
|
||||
public IAlarmConditionSink MarkAsAlarmCondition(AlarmConditionInfo info) => new NullSink();
|
||||
}
|
||||
|
||||
private sealed class NullSink : IAlarmConditionSink
|
||||
{
|
||||
public void OnTransition(AlarmEventArgs args) { }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4: Run to verify it passes** — same filter → PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNode.cs \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/CapturingAddressSpaceBuilder.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/CapturingAddressSpaceBuilderTests.cs
|
||||
git commit -m "feat(otopcua): capturing address-space builder for driver discovery"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: `DiscoveredNodeMapper` — map discovered nodes under an equipment
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 3
|
||||
|
||||
**Files:**
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/DiscoveredInjection.cs` (the `DiscoveredFolder`/`DiscoveredVariable` materialize DTOs — placed in OpcUaServer so both the applier and the Runtime mapper can reference them; Runtime already references OpcUaServer)
|
||||
- Create: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNodeMapper.cs`
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveredNodeMapperTests.cs`
|
||||
|
||||
**Pure function** turning `IReadOnlyList<DiscoveredNode>` + an `equipmentId` + the driver's authored-tag refs into folders + variables (NodeIds under the equipment) + routing entries. Rules:
|
||||
- **Device-folder collapse:** if every node shares an identical segment at index 1 (the single device folder under the driver root), drop index 1 → `EQ/FOCAS/Identity/…` rather than `EQ/FOCAS/<deviceHost>/Identity/…`. With ≥2 devices the segments differ → not collapsed (device level retained, degrades gracefully — multi-device equipment mapping itself is a deferred follow-up).
|
||||
- **Dedup:** skip any node whose `FullReference` is in `authoredRefs` (already a Config-DB equipment tag for this driver — applies to drivers like Galaxy whose discovery refs equal the equipment-tag FullNames; for FOCAS the FixedTree refs never match authored refs, so all FixedTree nodes pass through).
|
||||
- **NodeId:** `EquipmentNodeIds.Variable(equipmentId, folderPath, name)` where `folderPath` = collapsed segments joined by `/`. Folders deduped, each parented at its prefix.
|
||||
- **DataType:** convert `DriverDataType` → the OPC-UA-builtin string `OtOpcUaNodeManager.EnsureVariable` expects. **Reuse the existing convention** — grep for how `EquipmentTagPlan.DataType` is produced from `DriverDataType` (e.g. a `DriverDataType.ToString()` / a mapping helper) and `OtOpcUaNodeManager.ResolveBuiltInDataType`; do NOT invent a new mapping. If a helper exists, call it; the switch below is a fallback to align if not.
|
||||
- **Writable:** from `DiscoveredNode.Writable` (FixedTree is read-only).
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```csharp
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Drivers;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class DiscoveredNodeMapperTests
|
||||
{
|
||||
private static DiscoveredNode Node(string[] path, string name, string fullRef,
|
||||
DriverDataType dt = DriverDataType.Float64, bool writable = false)
|
||||
=> new(path, name, name, fullRef, dt, false, null, writable, false);
|
||||
|
||||
[Fact]
|
||||
public void Maps_under_equipment_collapsing_single_device_folder()
|
||||
{
|
||||
var nodes = new[]
|
||||
{
|
||||
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "10.0.0.5:8193/Identity/SeriesNumber", DriverDataType.String),
|
||||
Node(["FOCAS", "10.0.0.5:8193", "Axes", "X"], "AbsolutePosition", "10.0.0.5:8193/Axes/X/AbsolutePosition"),
|
||||
};
|
||||
|
||||
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: []);
|
||||
|
||||
result.Variables.Select(v => v.NodeId).ShouldBe(new[]
|
||||
{
|
||||
"EQ-1/FOCAS/Identity/SeriesNumber",
|
||||
"EQ-1/FOCAS/Axes/X/AbsolutePosition",
|
||||
}, ignoreOrder: true);
|
||||
// folders: EQ-1/FOCAS, EQ-1/FOCAS/Identity, EQ-1/FOCAS/Axes, EQ-1/FOCAS/Axes/X
|
||||
result.Folders.Select(f => f.NodeId).ShouldContain("EQ-1/FOCAS/Axes/X");
|
||||
result.Folders.First(f => f.NodeId == "EQ-1/FOCAS/Axes/X").ParentNodeId.ShouldBe("EQ-1/FOCAS/Axes");
|
||||
// routing: driverRef → nodeId
|
||||
result.RoutingByRef["10.0.0.5:8193/Identity/SeriesNumber"].ShouldBe("EQ-1/FOCAS/Identity/SeriesNumber");
|
||||
result.Variables.First(v => v.NodeId.EndsWith("SeriesNumber")).Writable.ShouldBeFalse();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Dedups_authored_refs()
|
||||
{
|
||||
var nodes = new[]
|
||||
{
|
||||
Node(["FOCAS", "10.0.0.5:8193"], "parts-count", "parts-count"), // authored
|
||||
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "10.0.0.5:8193/Identity/SeriesNumber", DriverDataType.String),
|
||||
};
|
||||
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: new HashSet<string> { "parts-count" });
|
||||
result.Variables.ShouldHaveSingleItem();
|
||||
result.Variables[0].NodeId.ShouldBe("EQ-1/FOCAS/Identity/SeriesNumber");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Does_not_collapse_when_two_devices_present()
|
||||
{
|
||||
var nodes = new[]
|
||||
{
|
||||
Node(["FOCAS", "10.0.0.5:8193", "Identity"], "SeriesNumber", "a", DriverDataType.String),
|
||||
Node(["FOCAS", "10.0.0.6:8193", "Identity"], "SeriesNumber", "b", DriverDataType.String),
|
||||
};
|
||||
var result = DiscoveredNodeMapper.Map("EQ-1", nodes, authoredRefs: []);
|
||||
result.Variables.Select(v => v.NodeId).ShouldBe(new[]
|
||||
{
|
||||
"EQ-1/FOCAS/10.0.0.5:8193/Identity/SeriesNumber",
|
||||
"EQ-1/FOCAS/10.0.0.6:8193/Identity/SeriesNumber",
|
||||
}, ignoreOrder: true);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Run to verify it fails.**
|
||||
|
||||
**Step 3: Implement `DiscoveredInjection.cs` (DTOs)**
|
||||
|
||||
```csharp
|
||||
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
/// <summary>A folder to ensure during discovered-node injection (NodeId + parent + display).</summary>
|
||||
public sealed record DiscoveredFolder(string NodeId, string? ParentNodeId, string DisplayName);
|
||||
|
||||
/// <summary>A read-or-write variable to ensure during discovered-node injection.</summary>
|
||||
public sealed record DiscoveredVariable(
|
||||
string NodeId, string ParentNodeId, string DisplayName, string DataType, bool Writable, bool IsArray, uint? ArrayLength);
|
||||
```
|
||||
|
||||
**Step 3b: Implement `DiscoveredNodeMapper.cs`**
|
||||
|
||||
```csharp
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
|
||||
|
||||
/// <summary>The mapped result of grafting discovered nodes under an equipment.</summary>
|
||||
public sealed record DiscoveredInjectionPlan(
|
||||
IReadOnlyList<DiscoveredFolder> Folders,
|
||||
IReadOnlyList<DiscoveredVariable> Variables,
|
||||
IReadOnlyDictionary<string, string> RoutingByRef); // driver FullReference → equipment NodeId
|
||||
|
||||
/// <summary>
|
||||
/// Pure mapper: re-roots a driver's captured discovery tree under an equipment node, deduping
|
||||
/// authored Config-DB refs and collapsing the single device-host folder. See the design doc.
|
||||
/// </summary>
|
||||
public static class DiscoveredNodeMapper
|
||||
{
|
||||
public static DiscoveredInjectionPlan Map(
|
||||
string equipmentId, IReadOnlyList<DiscoveredNode> nodes, ISet<string> authoredRefs)
|
||||
{
|
||||
var kept = nodes.Where(n => !authoredRefs.Contains(n.FullReference)).ToList();
|
||||
|
||||
// Collapse a single shared device-folder level (index 1 under the driver root) when present.
|
||||
var collapseIndex1 = kept.Count > 0
|
||||
&& kept.All(n => n.FolderPathSegments.Count >= 2)
|
||||
&& kept.Select(n => n.FolderPathSegments[1]).Distinct(StringComparer.Ordinal).Count() == 1;
|
||||
|
||||
static IReadOnlyList<string> Effective(IReadOnlyList<string> segs, bool collapse)
|
||||
=> collapse ? [segs[0], .. segs.Skip(2)] : segs;
|
||||
|
||||
var folders = new Dictionary<string, DiscoveredFolder>(StringComparer.Ordinal);
|
||||
var variables = new List<DiscoveredVariable>();
|
||||
var routing = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var n in kept)
|
||||
{
|
||||
var segs = Effective(n.FolderPathSegments, collapseIndex1);
|
||||
|
||||
// Ensure every prefix folder EQ/seg0, EQ/seg0/seg1, …
|
||||
for (var i = 0; i < segs.Count; i++)
|
||||
{
|
||||
var folderPath = string.Join('/', segs.Take(i + 1));
|
||||
var nodeId = EquipmentNodeIds.SubFolder(equipmentId, folderPath);
|
||||
if (folders.ContainsKey(nodeId)) continue;
|
||||
var parent = i == 0 ? equipmentId : EquipmentNodeIds.SubFolder(equipmentId, string.Join('/', segs.Take(i)));
|
||||
folders[nodeId] = new DiscoveredFolder(nodeId, parent, segs[i]);
|
||||
}
|
||||
|
||||
var varFolderPath = string.Join('/', segs);
|
||||
var varNodeId = EquipmentNodeIds.Variable(equipmentId, varFolderPath, n.BrowseName);
|
||||
var varParent = EquipmentNodeIds.SubFolder(equipmentId, varFolderPath);
|
||||
variables.Add(new DiscoveredVariable(
|
||||
varNodeId, varParent, n.DisplayName, ToBuiltinTypeString(n.DataType), n.Writable, n.IsArray, n.ArrayDim));
|
||||
routing[n.FullReference] = varNodeId;
|
||||
}
|
||||
|
||||
return new DiscoveredInjectionPlan(folders.Values.ToList(), variables, routing);
|
||||
}
|
||||
|
||||
// Align with the existing DriverDataType → builtin-string convention used by EquipmentTagPlan /
|
||||
// OtOpcUaNodeManager.ResolveBuiltInDataType. VERIFY against that during implementation.
|
||||
private static string ToBuiltinTypeString(DriverDataType dt) => dt.ToString();
|
||||
}
|
||||
```
|
||||
|
||||
> **Implementation note:** before finalizing `ToBuiltinTypeString`, grep how `EquipmentTagPlan.DataType` is produced from a `DriverDataType` and what strings `OtOpcUaNodeManager.ResolveBuiltInDataType` accepts (e.g. `"Float64"`, `"String"`, `"Int32"`). If `DriverDataType.ToString()` already matches, keep it; otherwise mirror the existing mapping helper. The mapper test asserts NodeIds/structure, not the exact type string — add a focused assertion once the convention is confirmed.
|
||||
|
||||
**Step 4: Run to verify it passes.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/DiscoveredInjection.cs \
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNodeMapper.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveredNodeMapperTests.cs
|
||||
git commit -m "feat(otopcua): map discovered nodes under an equipment subfolder"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Node-manager `RaiseNodesAddedModelChange()`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** Task 1, Task 2
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs` (add a public method near `BuildNodeShapeChangedEvent:1525`)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerModelChangeOnAddTests.cs` (model on `NodeManagerSurgicalShapeUpdateTests.cs`)
|
||||
|
||||
Emit a Part 3 `GeneralModelChangeEvent` with verb `NodeAdded` so already-connected clients can refresh their browse after a runtime add. Mirror the existing `BuildNodeShapeChangedEvent` (verb `DataTypeChanged`) + its `Server.ReportEvent` seam, but build a `NodeAdded` change referencing the equipment subfolder root that gained children.
|
||||
|
||||
**Step 1: Write the failing test** — instantiate the node manager as the surgical-shape test does, `EnsureFolder` + `EnsureVariable` a couple of nodes, call `RaiseNodesAddedModelChange(parentNodeId)`, and assert it does not throw and (where the harness exposes reported events, as the surgical test does) that a `GeneralModelChangeEvent` with verb `NodeAdded` was reported. Reuse the surgical test's harness/setup verbatim.
|
||||
|
||||
**Step 2: Run to verify it fails** (method missing).
|
||||
|
||||
**Step 3: Implement** — add:
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Announce that nodes were added at runtime (discovered-node injection) so subscribed clients can
|
||||
/// refresh their browse. Part 3 §8.7.4: a GeneralModelChangeEvent is emitted by the Server object;
|
||||
/// verb = NodeAdded, affected = the subfolder root that gained children. Mirrors
|
||||
/// <see cref="BuildNodeShapeChangedEvent"/>'s ReportEvent seam; tolerant if auditing/eventing is off.
|
||||
/// </summary>
|
||||
/// <param name="affectedNodeId">The equipment/subfolder NodeId string under which nodes were added.</param>
|
||||
public void RaiseNodesAddedModelChange(string affectedNodeId)
|
||||
{
|
||||
GeneralModelChangeEventState e;
|
||||
lock (Lock)
|
||||
{
|
||||
// BUILD the event under Lock (consistent snapshot of _folders/_variables), mirroring
|
||||
// BuildNodeShapeChangedEvent: EventId, SourceNode = ObjectIds.Server, SourceName, Time,
|
||||
// Severity, a ModelChangeStructureDataType with Affected = new NodeId(affectedNodeId,
|
||||
// NamespaceIndex) + Verb = (byte)ModelChangeStructureVerbMask.NodeAdded, ClearChangeMasks.
|
||||
e = BuildNodesAddedModelChange(affectedNodeId);
|
||||
}
|
||||
// REPORT OUTSIDE Lock — Server.ReportEvent re-enters the server's own subscription/event path;
|
||||
// holding Lock across it risks a lock-order inversion (mirror ReportNodeShapeChangedEvent, NOT
|
||||
// ReportConditionEvent which uses alarm.ReportEvent). Tolerant: eventing off / no monitored items.
|
||||
try { Server.ReportEvent(SystemContext, e); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
#pragma warning disable CS0618
|
||||
Utils.LogError(ex, "OtOpcUaNodeManager: failed to report GeneralModelChangeEvent(NodeAdded) for {0}", affectedNodeId);
|
||||
#pragma warning restore CS0618
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> ⚠️ **Lock discipline (corrected 2026-06-26):** BUILD the `GeneralModelChangeEventState` under `lock (Lock)` (copy the field-population block from `BuildNodeShapeChangedEvent` `:1525`, changing only `Verb` → `NodeAdded` and `Affected`), but **REPORT `Server.ReportEvent` OUTSIDE the lock** — exactly like `ReportNodeShapeChangedEvent` / `RevertOptimisticWriteIfNeeded`. `Server.ReportEvent` re-enters the SDK subscription/event path; holding `Lock` across it risks a lock-order-inversion deadlock with a client that has event subscriptions. (An earlier draft of this plan said "keep it inside `lock (Lock)`" — that was wrong for `Server.ReportEvent`; `ReportConditionEvent` is *not* a valid analogue since it uses `alarm.ReportEvent`, the node's own notifier chain.)
|
||||
|
||||
**Step 4: Run to verify it passes.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerModelChangeOnAddTests.cs
|
||||
git commit -m "feat(otopcua): GeneralModelChangeEvent(NodeAdded) for runtime node adds"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 4: `AddressSpaceApplier.MaterialiseDiscoveredNodes(...)`
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (depends on Tasks 2, 3)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceApplier.cs` (add after `MaterialiseEquipmentTags:304`)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/AddressSpaceApplierTests.cs` (add cases)
|
||||
|
||||
Add an idempotent pass that ensures the mapped folders then variables via the existing `SafeEnsureFolder`/`SafeEnsureVariable`, then raises the model-change. Folders MUST be ensured parent-before-child (sort by NodeId depth / segment count).
|
||||
|
||||
**Step 1: Write the failing test** — using the applier test's existing fake sink, call `MaterialiseDiscoveredNodes` with 2 folders + 2 read-only variables and assert the sink received `EnsureFolder`/`EnsureVariable` with the right NodeIds/parents, `writable: false`, and that a re-apply is a no-op (idempotent — sink early-returns on existing). Assert `RaiseNodesAddedModelChange` is invoked (extend the fake sink/node-manager double to record it, mirroring how the existing test verifies materialize calls).
|
||||
|
||||
**Step 2: Run to verify it fails.**
|
||||
|
||||
**Step 3: Implement**
|
||||
|
||||
```csharp
|
||||
/// <summary>
|
||||
/// Materialise driver-discovered nodes (FixedTree) under an equipment at runtime. Idempotent:
|
||||
/// re-applies are cheap (the sink's EnsureFolder/EnsureVariable early-return on existing nodes), so
|
||||
/// this is safely re-run after every address-space rebuild. Folders are ensured parent-first.
|
||||
/// Emits a NodeAdded model-change so connected clients can refresh.
|
||||
/// </summary>
|
||||
public void MaterialiseDiscoveredNodes(
|
||||
string equipmentRootNodeId,
|
||||
IReadOnlyList<DiscoveredFolder> folders,
|
||||
IReadOnlyList<DiscoveredVariable> variables)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(folders);
|
||||
ArgumentNullException.ThrowIfNull(variables);
|
||||
if (folders.Count == 0 && variables.Count == 0) return;
|
||||
|
||||
foreach (var f in folders.OrderBy(f => f.NodeId.Count(c => c == '/')))
|
||||
SafeEnsureFolder(f.NodeId, f.ParentNodeId, f.DisplayName);
|
||||
|
||||
foreach (var v in variables)
|
||||
SafeEnsureVariable(v.NodeId, v.ParentNodeId, v.DisplayName, v.DataType, v.Writable,
|
||||
historianTagname: null, isArray: v.IsArray, arrayLength: v.ArrayLength);
|
||||
|
||||
_sink.RaiseNodesAddedModelChange(equipmentRootNodeId);
|
||||
|
||||
_logger.LogInformation(
|
||||
"AddressSpaceApplier: discovered nodes materialised under {Equipment} (folders={Folders}, vars={Vars})",
|
||||
equipmentRootNodeId, folders.Count, variables.Count);
|
||||
}
|
||||
```
|
||||
|
||||
> Confirm `_sink`'s interface exposes `RaiseNodesAddedModelChange` (the sink type wraps `OtOpcUaNodeManager`); add it to the sink interface if the applier talks to an `IAddressSpaceSink` abstraction rather than the concrete manager.
|
||||
|
||||
**Step 4: Run to verify it passes.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceApplier.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/AddressSpaceApplierTests.cs
|
||||
git commit -m "feat(otopcua): applier pass to materialise discovered nodes idempotently"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 5: `OpcUaPublishActor.MaterialiseDiscoveredNodes` message + handler
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (depends on Task 4)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs` (message record near the other records; `Receive<…>` at the block `:217`; handler near `HandleRebuild`)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs` (add a case)
|
||||
|
||||
**Step 1: Write the failing test** — with the publish-actor test harness (fake applier), send `MaterialiseDiscoveredNodes(equipmentRoot, folders, variables)` and assert the handler forwards to `_applier.MaterialiseDiscoveredNodes(...)` with the same payload.
|
||||
|
||||
**Step 2: Run to verify it fails.**
|
||||
|
||||
**Step 3: Implement** — add the message + Receive + handler:
|
||||
|
||||
```csharp
|
||||
/// <summary>Inject driver-discovered nodes (FixedTree) under an equipment at runtime (post-connect).</summary>
|
||||
public sealed record MaterialiseDiscoveredNodes(
|
||||
string EquipmentRootNodeId,
|
||||
IReadOnlyList<DiscoveredFolder> Folders,
|
||||
IReadOnlyList<DiscoveredVariable> Variables);
|
||||
```
|
||||
|
||||
In the Receive block (`:217`, alongside `Receive<RebuildAddressSpace>(HandleRebuild)`):
|
||||
|
||||
```csharp
|
||||
Receive<MaterialiseDiscoveredNodes>(HandleMaterialiseDiscovered);
|
||||
```
|
||||
|
||||
Handler:
|
||||
|
||||
```csharp
|
||||
private void HandleMaterialiseDiscovered(MaterialiseDiscoveredNodes msg)
|
||||
=> _applier.MaterialiseDiscoveredNodes(msg.EquipmentRootNodeId, msg.Folders, msg.Variables);
|
||||
```
|
||||
|
||||
**Step 4: Run to verify it passes.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs
|
||||
git commit -m "feat(otopcua): OpcUaPublishActor handles discovered-node materialisation"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 6: `DriverInstanceActor` post-connect bounded re-discovery
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on Task 1; touches actor lifecycle)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (message records area `:60-160`; `Connected()` entry via `InitializeSucceeded:278`; new private async discovery method + a self-scheduled retry tick)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
|
||||
|
||||
On reaching `Connected`, if `_driver is ITagDiscovery`, run discovery into a `CapturingAddressSpaceBuilder`, and `Context.Parent.Tell(new DiscoveredNodesReady(_driverInstanceId, nodes))`. Because FOCAS suppresses FixedTree until `FixedTreeCache` populates (~0–2 s), schedule a bounded retry: re-run every ~2 s up to a cap (~30 s / ~15 attempts) **or until the node count stops growing** (whichever first), then stop. `DiscoverAsync` reads in-memory cache → cheap. Reset/cancel the schedule on leaving `Connected` (DisconnectObserved/ForceReconnect) and re-arm on the next `Connected` entry. Use Akka scheduling (`Context.System.Scheduler.ScheduleTellOnce` self-tell of an internal `RediscoverTick`, tracked by an `ICancelable` so it's cancelled on state exit) — do NOT block the actor thread.
|
||||
|
||||
**Message records to add** (near the other nested records):
|
||||
|
||||
```csharp
|
||||
/// <summary>Published to the parent (DriverHostActor) after a post-connect discovery pass.</summary>
|
||||
public sealed record DiscoveredNodesReady(string DriverInstanceId, IReadOnlyList<DiscoveredNode> Nodes);
|
||||
|
||||
/// <summary>Internal self-tick driving bounded post-connect re-discovery.</summary>
|
||||
private sealed record RediscoverTick(int Generation, int Attempt, int LastCount);
|
||||
```
|
||||
|
||||
**Step 1: Write the failing test** — drive a `DriverInstanceActor` with a fake `IDriver` that also implements `ITagDiscovery`, whose `DiscoverAsync` yields 0 nodes on the first ~2 attempts then a non-empty set (simulating FixedTreeCache populating). Bring the actor to `Connected` (send the same init messages the existing `DriverInstanceActorTests` use). Use the TestKit parent probe (`Context.Parent` → the TestKit `TestActor` via `ActorOf` under the testkit, or the existing harness's parent-probe pattern in `DriverInstanceActorTests`) and `ExpectMsg<DiscoveredNodesReady>` — assert the eventually-delivered message carries the non-empty set, and that re-ticks stop after the set stabilises (no infinite stream). Use the TestKit scheduler / `Within` to advance.
|
||||
|
||||
**Step 2: Run to verify it fails.**
|
||||
|
||||
**Step 3: Implement** — add the discovery kick at the `InitializeSucceeded` Connected transition (after `ResubscribeDesired()`), a `Receive<RediscoverTick>` in `Connected()`, and a `RunDiscoveryAsync` that:
|
||||
- guards `_driver is ITagDiscovery disc` (else no-op),
|
||||
- builds a `CapturingAddressSpaceBuilder`, awaits `disc.DiscoverAsync(builder, ct)`,
|
||||
- `Context.Parent.Tell(new DiscoveredNodesReady(_driverInstanceId, builder.Nodes))`,
|
||||
- if `attempt < cap` and `builder.Nodes.Count` still growing (or zero), schedules the next `RediscoverTick(_initGeneration, attempt+1, builder.Nodes.Count)` via `ICancelable` (store in a field, cancel on `DetachSubscription`/state exit).
|
||||
- Tag ticks with `_initGeneration` and ignore stale-generation ticks (mirrors the existing `InitializeSucceeded.Generation` guard) so a reconnect cancels the prior loop.
|
||||
|
||||
> Use `ReceiveAsync<RediscoverTick>` (like the other async receives in `Connected()`), and wrap the discovery call in try/catch → log Info + reschedule (bounded). Mirror the existing cancelable-scheduling pattern already used in the actor (grep `Scheduler`/`ICancelable` in this file and `DriverHostActor`).
|
||||
|
||||
**Step 4: Run to verify it passes.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs
|
||||
git commit -m "feat(otopcua): driver-instance post-connect bounded re-discovery"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 7: `DriverHostActor` — inject discovered nodes (handler + routing + subscribe)
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on Tasks 2, 5, 6; touches actor + routing map)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (fields near `_nodeIdByDriverRef`; `Receive<…>` in BOTH receive states `:482` and `:512`; new handler; store `_lastComposition` in `PushDesiredSubscriptions`)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
|
||||
|
||||
Add `Receive<DriverInstanceActor.DiscoveredNodesReady>(HandleDiscoveredNodes)` to the two states that already handle `AttributeValuePublished` (`:484`, `:512`). New fields: `_lastComposition` (set at the end of `PushDesiredSubscriptions`) and `_discoveredByDriver` (`Dictionary<string, DiscoveredInjectionPlan>`). The handler:
|
||||
|
||||
1. If `_lastComposition` is null → stash nothing / log Debug and return (composition not applied yet; a later `DiscoveredNodesReady` retry will land after apply).
|
||||
2. Resolve the equipment: `_lastComposition.EquipmentNodes.Where(e => e.DriverInstanceId == id)`. 0 → log Info skip; >1 → log Warning skip (multi-device deferred). Else take its `EquipmentId`.
|
||||
3. Compute `authoredRefs` = `_lastComposition.EquipmentTags.Where(t => t.DriverInstanceId == id).Select(t => t.FullName)` set.
|
||||
4. `var plan = DiscoveredNodeMapper.Map(equipmentId, msg.Nodes, authoredRefs);`
|
||||
5. If `plan.Variables` empty → return (nothing new yet).
|
||||
6. `_discoveredByDriver[id] = plan;`
|
||||
7. For each `(ref, nodeId)` in `plan.RoutingByRef`: add to `_nodeIdByDriverRef[(id, ref)]` (the same `HashSet` fan-out structure used in `PushDesiredSubscriptions:1019`).
|
||||
8. `_opcUaPublishActor.Tell(new OpcUaPublishActor.MaterialiseDiscoveredNodes(equipmentId, plan.Folders, plan.Variables));`
|
||||
9. Merge the discovered refs into the driver's desired set and re-push: `child.Actor.Tell(new DriverInstanceActor.SetDesiredSubscriptions(union, SubscriptionPublishingInterval, alarmRefs))` where `union` = authored refs already pushed for that driver **plus** `plan.RoutingByRef.Keys`. (Keep the alarmRefs as last pushed.) The actor's `Connected` `SetDesiredSubscriptions` handler immediately re-subscribes (`:340-353`).
|
||||
|
||||
**Step 1: Write the failing test** — build a `DriverHostActor` via its existing test harness (`DriverHostActorTests`/`...WriteRoutingTests` show construction with fakes: a fake child/registry, fake OPC publish probe, a composition artifact). Apply a deployment whose composition has one equipment (`EQ-1`, `DriverInstanceId=d1`) + one authored tag, so `_lastComposition` is set and a child `d1` exists. Send `DriverInstanceActor.DiscoveredNodesReady("d1", <fixedtree nodes>)`. Assert: (a) the OPC publish probe received `MaterialiseDiscoveredNodes` with the mapped folders/vars; (b) the child probe received a `SetDesiredSubscriptions` whose refs include both the authored ref and the FixedTree refs; (c) a subsequent `AttributeValuePublished(d1, <fixedtree ref>, value)` routes to an `AttributeValueUpdate` at the mapped NodeId (proves `_nodeIdByDriverRef` updated).
|
||||
|
||||
**Step 2: Run to verify it fails.**
|
||||
|
||||
**Step 3: Implement** per the steps above. Store `_lastComposition = composition;` at the end of `PushDesiredSubscriptions` (after the existing logic). Reuse the exact fan-out add pattern for `_nodeIdByDriverRef` from `:1019-1045`.
|
||||
|
||||
**Step 4: Run to verify it passes.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs
|
||||
git commit -m "feat(otopcua): inject discovered nodes into the equipment projection on connect"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 8: `DriverHostActor` — re-inject discovered nodes after a rebuild
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (depends on Task 7; same file)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (tail of `PushDesiredSubscriptions`)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs` (add a case)
|
||||
|
||||
A structural redeploy triggers `RebuildAddressSpace` (full teardown) and `PushDesiredSubscriptions` rebuilds `_nodeIdByDriverRef` from authored tags only — losing the injected FixedTree nodes + mappings. After the existing `PushDesiredSubscriptions` work, **re-apply the cached `_discoveredByDriver`**: for each cached plan, re-add its `RoutingByRef` to `_nodeIdByDriverRef`, re-`Tell` `MaterialiseDiscoveredNodes`, and re-merge its refs into that driver's pushed `SetDesiredSubscriptions`.
|
||||
|
||||
**Step 1: Write the failing test** — after Task 7's injection, simulate a second `PushDesiredSubscriptions` (re-apply the same deployment). Assert the OPC publish probe receives `MaterialiseDiscoveredNodes` AGAIN and the child's re-pushed `SetDesiredSubscriptions` still includes the FixedTree refs (i.e. they weren't dropped by the rebuild).
|
||||
|
||||
**Step 2: Run to verify it fails** (today the rebuild drops them).
|
||||
|
||||
**Step 3: Implement** — extract the per-driver merge-and-materialise into a helper reused by both `HandleDiscoveredNodes` and a new `ReapplyDiscovered()` call at the tail of `PushDesiredSubscriptions` (after `_lastComposition` is set). Guard for the case where the driver no longer exists in `_children` or the equipment was removed (drop that cache entry).
|
||||
|
||||
**Step 4: Run to verify it passes.**
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs \
|
||||
tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs
|
||||
git commit -m "feat(otopcua): re-inject discovered nodes after address-space rebuild"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 9: Integration test — discovered nodes appear + carry values + survive lifecycle
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on Tasks 7, 8)
|
||||
|
||||
**Files:**
|
||||
- Create: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveryInjectionEndToEndTests.cs`
|
||||
- (Reuse / extend any existing in-memory `IDriver` test double in the Runtime tests; create a `FakeDiscoverableDriver : IDriver, ITagDiscovery, ISubscribable` if none fits.)
|
||||
|
||||
A focused in-process integration test (no docker, no CNC): wire `DriverHostActor` + `OpcUaPublishActor` + a real `AddressSpaceApplier`/node manager (as the publish-actor rebuild tests do) + a fake discoverable+subscribable driver whose `DiscoverAsync` exposes a delayed FixedTree set and whose poll returns values for those refs. Assert end-to-end:
|
||||
1. After connect + the discovery delay, the node manager has variables at `EQ-…/FOCAS/…`.
|
||||
2. A poll value for a FixedTree ref surfaces as a Good `AttributeValueUpdate` at the mapped NodeId (no longer `BadWaitingForInitialData`).
|
||||
3. After a simulated rebuild (re-apply), the nodes + values persist.
|
||||
|
||||
> If a full wiring proves too heavy for one test fixture, split into (9a) host→publish materialisation reaching a real node manager, and (9b) value-route smoke — but keep both in this file. Do NOT silently drop the lifecycle assertion; if you cannot wire a real node manager here, log that limitation in the test summary and cover it in Task 10's docker-dev step instead.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DiscoveryInjectionEndToEndTests.cs
|
||||
git commit -m "test(otopcua): end-to-end discovered-node injection + value flow"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 10: Build + full suite + docker-dev smoke
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (depends on all prior)
|
||||
|
||||
**Files:** none (verification only; fix wiring if the build/tests surface gaps)
|
||||
|
||||
**Steps:**
|
||||
1. `dotnet build ZB.MOM.WW.OtOpcUa.slnx` → 0 errors, 0 warnings.
|
||||
2. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~Runtime.Tests"` → green.
|
||||
3. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~OpcUaServer.Tests"` → green.
|
||||
4. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~FOCAS"` → green (no regression).
|
||||
5. **docker-dev smoke (optional but recommended):** build the docker-dev image, boot `central-1` (fused admin+driver), confirm via logs that a connected discoverable driver injects nodes (`AddressSpaceApplier: discovered nodes materialised`) and that browse shows `EQ-…/FOCAS/…`. (Mirror the symptom-#1 docker-dev confirmation in the investigation plan.)
|
||||
6. Commit any wiring fixes with a `fix(otopcua):` message.
|
||||
|
||||
---
|
||||
|
||||
## Task 11: Docs
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (depends on Task 10)
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/plans/2026-06-25-otopcua-equipment-dataplane-investigation.md` (mark symptom #2 / the FixedTree feature done, link this plan + the design doc)
|
||||
- Modify: `docs/deployments/wonder-app-vd03-makino-z-34184.md` (note FixedTree now surfaces under `EQ-…/FOCAS/…`)
|
||||
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md` (status → Implemented)
|
||||
|
||||
**Step: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/plans/2026-06-25-otopcua-equipment-dataplane-investigation.md \
|
||||
docs/deployments/wonder-app-vd03-makino-z-34184.md \
|
||||
docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md
|
||||
git commit -m "docs(otopcua): record FixedTree-under-Equipment injection feature"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Live deploy (post-plan, with user confirmation)
|
||||
|
||||
Following the symptom-#1 pattern (self-contained publish-overlay → wonder): after the suite is green and docker-dev confirms, **confirm with the user before deploying to the production CNC node**, then deploy and browse `EQ-3686c0272279/FOCAS/Identity/SeriesNumber` + `…/Axes/X/AbsolutePosition` (assert status Good — values may be 0 on the idle machine). Live deploy is explicitly NOT part of the build/test gate.
|
||||
|
||||
## Follow-ups (out of scope, documented)
|
||||
|
||||
- Discovered **alarms** injection; **multi-device-per-driver-instance** equipment mapping; **writable** discovered nodes.
|
||||
- Reconcile the AdminUI↔driver FOCAS config-format mismatch (series-as-number, scheme-less host) at the AdminUI source.
|
||||
- Shared `AddZbSerilog` not setting static `Serilog.Log.Logger` (latent across all 3 apps).
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 1: DiscoveredNode DTO + CapturingAddressSpaceBuilder", "status": "completed"},
|
||||
{"id": 2, "subject": "Task 2: DiscoveredNodeMapper + materialize DTOs", "status": "completed", "blockedBy": [1]},
|
||||
{"id": 3, "subject": "Task 3: NodeManager RaiseNodesAddedModelChange", "status": "completed"},
|
||||
{"id": 4, "subject": "Task 4: AddressSpaceApplier.MaterialiseDiscoveredNodes", "status": "completed", "blockedBy": [2, 3]},
|
||||
{"id": 5, "subject": "Task 5: OpcUaPublishActor.MaterialiseDiscoveredNodes message+handler", "status": "completed", "blockedBy": [4]},
|
||||
{"id": 6, "subject": "Task 6: DriverInstanceActor post-connect bounded re-discovery", "status": "completed", "blockedBy": [1]},
|
||||
{"id": 7, "subject": "Task 7: DriverHostActor inject discovered nodes", "status": "completed", "blockedBy": [2, 5, 6]},
|
||||
{"id": 8, "subject": "Task 8: DriverHostActor re-inject after rebuild", "status": "completed", "blockedBy": [7]},
|
||||
{"id": 9, "subject": "Task 9: End-to-end discovered-node injection test", "status": "completed", "blockedBy": [7, 8]},
|
||||
{"id": 10, "subject": "Task 10: Build + full suite + docker-dev smoke", "status": "completed", "blockedBy": [9]},
|
||||
{"id": 11, "subject": "Task 11: Docs", "status": "completed", "blockedBy": [10]}
|
||||
],
|
||||
"nativeTaskIds": {
|
||||
"1": 21, "2": 22, "3": 23, "4": 24, "5": 25, "6": 26,
|
||||
"7": 27, "8": 28, "9": 29, "10": 30, "11": 31
|
||||
},
|
||||
"lastUpdated": "2026-06-26T00:00:00Z",
|
||||
"status": "offline-complete; live wonder validation pending",
|
||||
"branch": "feat/focas-fixedtree-equipment-injection"
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
# OtOpcUa — FixedTree-injection follow-ups (design)
|
||||
|
||||
**Date:** 2026-06-26
|
||||
**Status:** ✅ Implemented (2026-06-26) — all five follow-ups (A–E) built via subagent-driven development
|
||||
(16 commits `c2c368dc`..`0074f37a` on `feat/focas-fixedtree-equipment-injection`; every task spec+code
|
||||
reviewed, high-risk tasks with serial Opus reviews). Offline suites green: Runtime.Tests 331, OpcUaServer.Tests 319,
|
||||
FOCAS 248 + AbLegacy/composer additions; `dotnet build` 0 errors, production `src/` 0 warnings (TreatWarningsAsErrors).
|
||||
No DB migration and no deployment-artifact wire-format change were needed (E is projection-only — the columns +
|
||||
`Devices` array were already serialized). Live wonder re-validation of the single-device FOCAS path is optional/user-gated
|
||||
(the base feature's live path is unchanged by these follow-ups).
|
||||
**Companion to:** [`2026-06-26-otopcua-fixedtree-equipment-injection-design.md`](2026-06-26-otopcua-fixedtree-equipment-injection-design.md)
|
||||
(the base feature — ✅ built + live-validated on `wonder-app-vd03`). This design works through the
|
||||
follow-ups that feature's review chain surfaced.
|
||||
**Branch:** continue on `feat/focas-fixedtree-equipment-injection` (stacked on `fix/focas-poll-io-serialization`,
|
||||
local/unpushed — standing rule is "commit/push only when asked").
|
||||
|
||||
---
|
||||
|
||||
## Scope (locked with the user 2026-06-26)
|
||||
|
||||
The user selected **all five** items below. The base feature's explicit non-goals — discovered-**alarm**
|
||||
injection and **writable** discovered nodes — remain out of scope (locked design decisions, untouched).
|
||||
|
||||
| # | Follow-up | Size | Notes |
|
||||
|---|---|---|---|
|
||||
| A | Hardcoded 30 s discovery timeout → injectable | trivial | behavior-preserving |
|
||||
| B | Re-discovery opt-in/policy gate per driver | moderate | back-compat default |
|
||||
| C | Config-unchanged driver→equipment rebind re-triggers discovery | moderate | reverses a deliberate `won't-fix` |
|
||||
| D | De-dup the double `SetDesiredSubscriptions` during redeploy | small | one extra unsub/resub blip today |
|
||||
| E | Lift the ≥1-authored-tag requirement + multi-device-per-driver | largest | **projection-only, no DB migration** |
|
||||
|
||||
## Key discovery that shapes E
|
||||
|
||||
The Config-DB **already** models the equipment→driver(→device) association as first-class data — no schema
|
||||
change is needed for E:
|
||||
|
||||
- `Equipment.DriverInstanceId` (`string?`, made nullable by migration `20260608104706_NullableEquipmentDriverInstanceId`)
|
||||
- `Equipment.DeviceId` (`string?`) — FK to a multi-device driver's device
|
||||
- `Device` is a first-class entity (`DeviceId`, `DriverInstanceId`, schemaless `DeviceConfig` JSON with host)
|
||||
- The AdminUI equipment editor already exposes an optional driver pick (`EquipmentInput.DriverInstanceId`)
|
||||
|
||||
The **only** gap is the runtime projection: `EquipmentNode` is `(EquipmentId, DisplayName, UnsLineId)` and drops
|
||||
`DriverInstanceId`/`DeviceId`, so the injector (`DriverHostActor.HandleDiscoveredNodes`) can only resolve the
|
||||
equipment by inferring it from authored `EquipmentTags` — hence the ≥1-tag requirement. E closes that gap in the
|
||||
projection + resolver, not the schema.
|
||||
|
||||
---
|
||||
|
||||
## A. Discovery timeout → injectable
|
||||
|
||||
`DriverInstanceActor.HandleRediscoverAsync` hardcodes `new CancellationTokenSource(TimeSpan.FromSeconds(30))`
|
||||
while the rediscover interval + attempt-cap are already constructor parameters. Add a
|
||||
`rediscoverDiscoverTimeout` (`TimeSpan`, default `TimeSpan.FromSeconds(30)`) to the ctor and the `Props`
|
||||
factory; use the field instead of the literal. Pure consistency fix; default preserves behavior.
|
||||
|
||||
## B. Re-discovery opt-in / policy gate
|
||||
|
||||
**Problem:** `StartDiscovery()` runs the bounded retry loop for **every** `ITagDiscovery` driver on every
|
||||
(re)connect. FOCAS needs it (its `FixedTreeCache` fills ~0–2 s *after* connect, so a single early pass would
|
||||
capture an empty/partial tree). A driver that browses its full shape **synchronously** inside `DiscoverAsync`
|
||||
(OpcUaClient, TwinCAT, AB) needs at most **one** pass — the 15×2 s retry is wasted (potentially heavy) network I/O.
|
||||
|
||||
**Decision:** a per-driver **policy**, declared in code (driver "heaviness" is a property of the driver *type*,
|
||||
needs no DB/AdminUI plumbing, lowest risk). Add a default-implemented member to `ITagDiscovery`:
|
||||
|
||||
```csharp
|
||||
public enum DiscoveryRediscoverPolicy { UntilStable, Once, Never }
|
||||
|
||||
public interface ITagDiscovery
|
||||
{
|
||||
DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable; // default = today's behavior
|
||||
Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken);
|
||||
}
|
||||
```
|
||||
|
||||
- **`UntilStable`** (default, unchanged) — today's loop: retry every `_rediscoverInterval` up to
|
||||
`_rediscoverMaxAttempts` or until the captured signature is non-empty and stable.
|
||||
- **`Once`** — kick exactly one discovery pass on connect, emit one `DiscoveredNodesReady`, then stop.
|
||||
- **`Never`** — no post-connect discovery kick at all.
|
||||
|
||||
**Driver assignments:**
|
||||
- **FOCAS** → `UntilStable` (explicit; it genuinely needs the retry).
|
||||
- **OpcUaClient, TwinCAT, AbCip, AbLegacy** → `Once` (they discover synchronously in `DiscoverAsync`; one pass
|
||||
on connect injects their tree, the retry loop only added cost). Any driver not overriding the default keeps
|
||||
`UntilStable`, so this is a strict no-regression change.
|
||||
|
||||
**Mechanism:** `DriverInstanceActor.StartDiscovery` reads `((ITagDiscovery)_driver).RediscoverPolicy`. `Never`
|
||||
→ return without scheduling. `Once`/`UntilStable` → schedule the first `RediscoverTick`; `HandleRediscoverAsync`
|
||||
stops after the first pass when the policy is `Once` (instead of evaluating stop-on-stable).
|
||||
|
||||
**Alternative considered (rejected):** a per-instance JSON flag parsed by the host + AdminUI. More flexible but
|
||||
adds artifact/AdminUI plumbing for a knob whose correct value is type-uniform.
|
||||
|
||||
## C. Config-unchanged rebind re-triggers discovery
|
||||
|
||||
**Problem:** when a redeploy rebinds a driver to a new equipment **without** a `DriverConfig` change,
|
||||
`PushDesiredSubscriptions`' re-inject tail correctly **drops** the stale cached plan (a stale `EQ-1`-scoped graft
|
||||
under `EQ-2` would be worse), but `ReconcileDrivers` only restarts a child on a `DriverConfig` change — so a
|
||||
config-unchanged child is never reconnected and the FixedTree stays absent under the new equipment until the
|
||||
driver's next natural reconnect/restart.
|
||||
|
||||
The base feature deliberately did **not** add a re-trigger here, to avoid coupling the subscription pass to
|
||||
driver-lifecycle control. This follow-up reverses that — but cleanly, because the trigger is a **discovery**
|
||||
action, not lifecycle control (no stop/restart), and it is idempotent.
|
||||
|
||||
**Decision:** add a `DriverInstanceActor.TriggerRediscovery` message. In the re-inject tail, the two branches
|
||||
that `Remove` a cached plan because of a rebind/loss also `Tell` that driver's child `TriggerRediscovery`. The
|
||||
child kicks a fresh `RediscoverTick` (current `_initGeneration`) **iff it is in the `Connected` state**;
|
||||
otherwise it no-ops (its eventual reconnect re-discovers anyway). The discovery pass re-emits
|
||||
`DiscoveredNodesReady`, which resolves against the **new** composition (`_lastComposition`) and grafts under the
|
||||
new equipment. The re-trigger honors B's policy (`Never` drivers do not re-discover; `Once`/`UntilStable` run
|
||||
their normal pass(es)).
|
||||
|
||||
Update the inline comment at the drop site and the follow-up note in the base design doc to record the new
|
||||
behavior.
|
||||
|
||||
## D. De-dup the double `SetDesiredSubscriptions`
|
||||
|
||||
**Problem:** during an in-process redeploy, a cached driver receives two `SetDesiredSubscriptions`:
|
||||
the bulk authored-only send in `PushDesiredSubscriptions`, then the authored∪discovered union from
|
||||
`ApplyDiscoveredPlan` (the re-inject tail). The first send forces the child to unsubscribe the whole handle
|
||||
(authored tags included) then the second re-subscribes — one extra blip per cached driver per redeploy.
|
||||
|
||||
**Decision:** in the bulk loop, **skip** the send for any driver that has a `_discoveredByDriver` entry — the
|
||||
re-inject tail sends their complete union. **Critical fallback:** the re-inject tail can still *drop* a cached
|
||||
plan (rebind/loss, see C); when it does, it must send the **authored-only** set for that driver so its authored
|
||||
subscriptions are not lost. Net invariant: every driver receives exactly **one** `SetDesiredSubscriptions` per
|
||||
redeploy.
|
||||
|
||||
## E. Lift the ≥1-authored-tag requirement + multi-device
|
||||
|
||||
No DB migration — projection + resolver only.
|
||||
|
||||
**E1 — projection (`AddressSpaceComposer`).** Extend `EquipmentNode`:
|
||||
|
||||
```csharp
|
||||
public sealed record EquipmentNode(
|
||||
string EquipmentId,
|
||||
string DisplayName,
|
||||
string UnsLineId,
|
||||
string? DriverInstanceId = null, // from Equipment.DriverInstanceId
|
||||
string? DeviceId = null, // from Equipment.DeviceId
|
||||
string? DeviceHost = null); // resolved at projection time: Equipment.DeviceId -> Device.DeviceConfig host
|
||||
```
|
||||
|
||||
`DriverInstanceId`/`DeviceId` are copied straight off the `Equipment` row. `DeviceHost` is resolved by joining
|
||||
`Equipment.DeviceId → Device` and parsing the host out of that `Device`'s schemaless `DeviceConfig` JSON, so the
|
||||
resolver can match it against a discovered device-host folder without re-reading the DB. All three are nullable;
|
||||
existing single-equipment behavior is unaffected when they're null.
|
||||
|
||||
**E2 — resolver (`DriverHostActor.HandleDiscoveredNodes`, and the redeploy re-inject tail).** Replace tag-only
|
||||
equipment resolution with:
|
||||
|
||||
> **candidates** = { equipments where `EquipmentNode.DriverInstanceId == driverId` } **∪**
|
||||
> { equipments inferred from authored `EquipmentTags` for `driverId` } (keeps today's path working)
|
||||
|
||||
- **0 candidates** → log Info, skip (unchanged).
|
||||
- **1 candidate** → graft all discovered nodes under it (today's single-device behavior, now also works with
|
||||
**zero** authored tags because the equipment-level `DriverInstanceId` resolves it). The device-host folder is
|
||||
still collapsed (single device).
|
||||
- **>1 candidates (multi-device)** → partition discovered nodes by their device-host folder segment
|
||||
(`DiscoveredNode.FolderPathSegments[1]`) and graft each device's subtree under the equipment whose
|
||||
`DeviceHost` matches that segment. A device-host with no matching equipment is **warn-skipped** (its subtree is
|
||||
not grafted) rather than mis-grafted. The mapper's existing device-host collapse already disables itself when
|
||||
≥2 distinct device-host segments are present, so multi-device paths retain the device-host level and don't
|
||||
collide.
|
||||
|
||||
**⚠️ Implementation risk (E2 multi-device only):** the partition join is a **host string** match — the driver's
|
||||
emitted device-host folder segment (FOCAS uses `device.HostAddress`) must equal the equipment's projected
|
||||
`DeviceHost` (parsed from `DeviceConfig`). Both ultimately derive from the same device configuration, but the
|
||||
string forms must be normalized to match (e.g. `host:port`). The warn-skip fallback makes a mismatch *safe* (no
|
||||
mis-graft, authored tags + single-device paths unaffected); a normalization helper + a unit test pin the formats
|
||||
together. Single-device deployments (the validated FOCAS `z-34184` case) take the "1 candidate" path and are
|
||||
**not** exposed to this risk.
|
||||
|
||||
---
|
||||
|
||||
## Data flow (unchanged)
|
||||
|
||||
E changes only *which equipment* a discovered node is grafted under and *whether* an equipment with no authored
|
||||
tags participates. Once the NodeId is assigned, the materialize → subscribe → poll → push value path is exactly
|
||||
the base feature's path; B/C/D change *when/how often* discovery runs and *how many* subscription pushes occur,
|
||||
not the value path.
|
||||
|
||||
## Error handling
|
||||
|
||||
- B `Never` driver → no discovery, authored tags unaffected.
|
||||
- C re-trigger on a non-`Connected` child → no-op (safe; reconnect re-discovers).
|
||||
- D dropped-plan fallback → authored-only send, so a rebind/loss never strands a driver's authored subscriptions.
|
||||
- E multi-device unmatched device-host → Warning + skip that device's subtree; other devices + authored tags
|
||||
unaffected. >1 candidate with no `DeviceHost` data anywhere → falls back to the base feature's warn+skip
|
||||
(no regression).
|
||||
|
||||
## Testing
|
||||
|
||||
- **A:** ctor/`Props` wires the timeout; default is 30 s (assert via a short injected timeout in an existing
|
||||
rediscover test).
|
||||
- **B:** `Never` → no `DiscoveredNodesReady`; `Once` → exactly one even when the captured set would keep growing;
|
||||
`UntilStable` → today's loop (regression). FOCAS reports `UntilStable`; the four network drivers report `Once`.
|
||||
- **C:** rebind drop branch `Tell`s `TriggerRediscovery`; `Connected` child re-discovers and re-emits;
|
||||
non-`Connected` child no-ops; re-trigger respects a `Never` policy.
|
||||
- **D:** single-send invariant — a cached driver gets exactly one `SetDesiredSubscriptions` on redeploy
|
||||
(union when applied; authored-only when the plan is dropped).
|
||||
- **E1:** `EquipmentNode` projection carries `DriverInstanceId`/`DeviceId`/`DeviceHost`; `DeviceHost` resolves via
|
||||
the `Device` join + `DeviceConfig` host parse; nulls when unset.
|
||||
- **E2:** tag-less graft (driver-level link, 0 authored tags); single-candidate unchanged (collapse retained);
|
||||
multi-device partition maps each device-host to the right equipment; unmatched device-host → warn-skip;
|
||||
host-string normalization.
|
||||
- **Regression:** Runtime.Tests, OpcUaServer.Tests, and the FOCAS suite stay green; the validated single-device
|
||||
FOCAS injection path is unchanged.
|
||||
- Live wonder re-validation of the single-device FOCAS path is **optional** and user-gated (the base feature is
|
||||
already live-validated; these follow-ups don't alter that path's runtime behavior).
|
||||
|
||||
## Scope / non-goals
|
||||
|
||||
- **In:** A–E above.
|
||||
- **Out (still locked):** discovered-**alarm** injection; **writable** discovered nodes.
|
||||
|
||||
## Touched code (anticipated)
|
||||
|
||||
- `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs` — `DiscoveryRediscoverPolicy` enum + default member.
|
||||
- Driver classes (`FocasDriver`, `OpcUaClientDriver`, `TwinCATDriver`, `AbCipDriver`, `AbLegacyDriver`) — override
|
||||
`RediscoverPolicy`.
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` — injectable timeout (A); policy-gated
|
||||
`StartDiscovery`/`HandleRediscoverAsync` (B); `TriggerRediscovery` message (C).
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` — resolver union + multi-device partition (E2);
|
||||
re-trigger on rebind drop (C); bulk-send skip + dropped-plan fallback (D).
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceComposer.cs` — `EquipmentNode` projection (E1).
|
||||
- `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DiscoveredNodeMapper.cs` — multi-device partition support (E2).
|
||||
- Tests under `tests/.../Runtime.Tests` and `tests/.../OpcUaServer.Tests`.
|
||||
|
||||
## Task tracking
|
||||
|
||||
Implementation tasks to be generated by writing-plans from this design.
|
||||
@@ -0,0 +1,358 @@
|
||||
# FixedTree-injection follow-ups — Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:subagent-driven-development (or executing-plans) to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Implement the five approved follow-ups to the FixedTree-under-Equipment dynamic-injection feature: (A) injectable discovery timeout, (B) per-driver re-discovery policy gate, (C) re-trigger discovery on a config-unchanged rebind, (D) de-dup the double `SetDesiredSubscriptions`, and (E) lift the ≥1-authored-tag requirement + support multi-device-per-driver.
|
||||
|
||||
**Architecture:** Akka.NET actor pipeline. `DriverInstanceActor` runs post-connect discovery and publishes `DiscoveredNodesReady`; `DriverHostActor` resolves the bound equipment, maps discovered nodes via `DiscoveredNodeMapper`, caches a plan, materialises via `OpcUaPublishActor`, and merges subscription refs. Composition is built by `AddressSpaceComposer.Compose` (pure, from entities) and mirrored by `DeploymentArtifact` (decode, from the sealed JSON artifact) — the two MUST stay byte-parity-equal. The deployment artifact already serialises full `Equipment` + `Device` entities, so E needs **no DB migration and no artifact wire-format change** — only decode/projection reads.
|
||||
|
||||
**Tech Stack:** .NET 10, C# (default interface members, collection expressions), Akka.NET, xUnit. Build: `dotnet build ZB.MOM.WW.OtOpcUa.slnx` (TreatWarningsAsErrors). Test (macOS — run filtered, NOT full-solution; the net48 Wonderware testhost can't run on macOS):
|
||||
- `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~Runtime.Tests"`
|
||||
- `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~OpcUaServer.Tests"`
|
||||
- `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~FOCAS"`
|
||||
|
||||
**Design:** [`2026-06-26-otopcua-fixedtree-followups-design.md`](2026-06-26-otopcua-fixedtree-followups-design.md). Branch: `feat/focas-fixedtree-equipment-injection` (continue on it; commit per task; do NOT push/merge — standing rule).
|
||||
|
||||
**Out of scope (locked):** discovered-alarm injection; writable discovered nodes.
|
||||
|
||||
---
|
||||
|
||||
## Execution order & parallelism
|
||||
|
||||
Two files are each touched by multiple tasks and MUST be edited serially:
|
||||
- `DriverInstanceActor.cs`: **Task 1 → Task 3 → Task 4**
|
||||
- `DriverHostActor.cs`: **Task 6 → Task 7 → Task 8 → Task 9**
|
||||
|
||||
Independent file sets that can run concurrently with the above: **Task 2** (`ITagDiscovery` + 5 driver files) and **Task 5** (`AddressSpaceComposer.cs` + `DeploymentArtifact.cs`).
|
||||
|
||||
Dependency summary: T3 ⟵{T1,T2}; T4 ⟵T3; T6 ⟵T5; T7 ⟵{T4,T6}; T8 ⟵T7; T9 ⟵{T5,T8}; T10 ⟵T9; T11 ⟵{T2,T4,T9,T10}.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Injectable discovery timeout (follow-up A)
|
||||
|
||||
**Classification:** small
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** Task 2, Task 5
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (ctor ~244-259, `Props` ~195-210, fields ~133-137, `HandleRediscoverAsync` ~765)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
|
||||
|
||||
**Context:** `HandleRediscoverAsync` hardcodes `using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));` (line 765). The rediscover interval + attempt-cap are already ctor params (`_rediscoverInterval`, `_rediscoverMaxAttempts`). Add a sibling param for the per-pass discovery timeout, default-preserving.
|
||||
|
||||
**Step 1 — Failing test:** add a test asserting that when constructed with a very short discovery timeout and an `ITagDiscovery` whose `DiscoverAsync` blocks, the pass cancels by the injected timeout (e.g. `DiscoveredNodesReady` carries an empty set within the short window) rather than waiting 30 s. Reuse the existing fake `ITagDiscovery` driver in this test file (search it for the existing discovery-actor fake; mirror that pattern). If a fully deterministic timeout test is too flaky, instead assert the wiring: a new public `DefaultRediscoverDiscoverTimeout` constant exists and equals 30 s, and the ctor/`Props` accept the param.
|
||||
|
||||
**Step 2 — Verify it fails:** `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~DriverInstanceActorDiscoveryTests"` → fails to compile / fails assertion.
|
||||
|
||||
**Step 3 — Implement:**
|
||||
- Add `public static readonly TimeSpan DefaultRediscoverDiscoverTimeout = TimeSpan.FromSeconds(30);` next to the other discovery defaults (~line 36-39).
|
||||
- Add a field `private readonly TimeSpan _rediscoverDiscoverTimeout;` (~133-137).
|
||||
- Add ctor param `TimeSpan? rediscoverDiscoverTimeout = null` (after `rediscoverMaxAttempts`); assign `_rediscoverDiscoverTimeout = rediscoverDiscoverTimeout ?? DefaultRediscoverDiscoverTimeout;`.
|
||||
- Add the matching optional param to `Props` and forward it.
|
||||
- In `HandleRediscoverAsync`, replace `TimeSpan.FromSeconds(30)` with `_rediscoverDiscoverTimeout`.
|
||||
|
||||
**Step 4 — Verify:** test passes; `dotnet build ZB.MOM.WW.OtOpcUa.slnx` → 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): make FixedTree re-discovery per-pass timeout injectable (follow-up A)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Re-discovery policy enum + ITagDiscovery member + driver overrides (follow-up B, part 1)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 1, Task 5
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Core/ZB.MOM.WW.OtOpcUa.Core.Abstractions/ITagDiscovery.cs`
|
||||
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.FOCAS/FocasDriver.cs`
|
||||
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs`
|
||||
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.TwinCAT/TwinCATDriver.cs`
|
||||
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbCip/AbCipDriver.cs`
|
||||
- Modify: `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy/AbLegacyDriver.cs`
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs` (or a small new test next to the FOCAS driver tests asserting `FocasDriver` reports `UntilStable`)
|
||||
|
||||
**Context:** `ITagDiscovery` (Core.Abstractions) currently has only `DiscoverAsync`. Add a policy the actor (Task 3) honors. Default = today's behavior so any non-overriding driver is unchanged.
|
||||
|
||||
**Step 1 — Failing test:** assert `new FocasDriver(...).RediscoverPolicy == DiscoveryRediscoverPolicy.UntilStable` and that one network driver (e.g. `OpcUaClientDriver`) reports `Once`. (Construct via the simplest available ctor/fake; if drivers are hard to construct standalone, assert the enum + default member exist and compile, plus a focused test on FOCAS.)
|
||||
|
||||
**Step 2 — Verify it fails:** compile failure (enum/member absent).
|
||||
|
||||
**Step 3 — Implement:**
|
||||
- In `ITagDiscovery.cs`, add the enum + a default-implemented member:
|
||||
```csharp
|
||||
/// <summary>How aggressively the host re-runs post-connect discovery for this driver.</summary>
|
||||
public enum DiscoveryRediscoverPolicy
|
||||
{
|
||||
/// <summary>Retry every interval up to the cap or until the captured set is non-empty and stable
|
||||
/// (for drivers whose discovered shape fills in asynchronously after connect, e.g. FOCAS FixedTree).</summary>
|
||||
UntilStable,
|
||||
/// <summary>Run exactly one discovery pass on connect (drivers that discover synchronously in DiscoverAsync).</summary>
|
||||
Once,
|
||||
/// <summary>Never run post-connect discovery.</summary>
|
||||
Never,
|
||||
}
|
||||
|
||||
public interface ITagDiscovery
|
||||
{
|
||||
/// <summary>Post-connect re-discovery policy. Default preserves the original retry-until-stable behavior.</summary>
|
||||
DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;
|
||||
|
||||
Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken);
|
||||
}
|
||||
```
|
||||
- `FocasDriver`: add `public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;` (explicit — it genuinely needs the retry loop).
|
||||
- `OpcUaClientDriver`, `TwinCATDriver`, `AbCipDriver`, `AbLegacyDriver`: add `public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;` — these discover synchronously inside `DiscoverAsync`, so one pass on connect suffices; the 15× retry was wasted (potentially heavy) work. **Before setting `Once`, confirm each driver's `DiscoverAsync` returns its complete set synchronously** (read each `DiscoverAsync`); if any populates a cache asynchronously after connect like FOCAS, leave it `UntilStable` and note why in a comment.
|
||||
|
||||
**Step 4 — Verify:** test passes; build 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): add ITagDiscovery.RediscoverPolicy + per-driver assignments (follow-up B)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 3: DriverInstanceActor honors RediscoverPolicy (follow-up B, part 2)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (serial after Task 1 on the same file; needs Task 2's enum)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (`StartDiscovery` ~736-740, `HandleRediscoverAsync` ~754-795)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
|
||||
|
||||
**Context:** `StartDiscovery()` currently kicks the loop for every `ITagDiscovery` driver. `HandleRediscoverAsync` schedules the next tick unless stable/capped. Gate both on the driver's `RediscoverPolicy`.
|
||||
|
||||
**Step 1 — Failing tests (3):**
|
||||
1. A fake `ITagDiscovery` driver reporting `Never` → no `DiscoveredNodesReady` is ever published after connect.
|
||||
2. A fake reporting `Once` whose captured set would keep GROWING across passes → exactly ONE `DiscoveredNodesReady` and no further tick scheduled.
|
||||
3. A fake reporting `UntilStable` → existing behavior (retries until stable/cap) — keep/extend the current passing test.
|
||||
|
||||
**Step 2 — Verify they fail:** the `Never`/`Once` tests fail (today everything retries-until-stable).
|
||||
|
||||
**Step 3 — Implement:**
|
||||
- In `StartDiscovery()`: after the `if (_driver is not ITagDiscovery discovery) return;` guard, read the policy; `if (discovery.RediscoverPolicy == DiscoveryRediscoverPolicy.Never) return;` before scheduling the first `RediscoverTick`.
|
||||
- In `HandleRediscoverAsync`: after publishing `DiscoveredNodesReady`, when the policy is `Once`, do NOT schedule another tick (log Debug "policy=Once, single pass" and return). When `UntilStable`, keep today's stop-on-stable + cap logic. (Read the live policy via `((ITagDiscovery)_driver).RediscoverPolicy`.)
|
||||
- Keep the generation guard intact.
|
||||
|
||||
**Step 4 — Verify:** the 3 tests pass; the full `DriverInstanceActorDiscoveryTests` + `Runtime.Tests` suite stays green; build 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): DriverInstanceActor honors RediscoverPolicy (Never/Once/UntilStable) (follow-up B)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 4: TriggerRediscovery message + handler (follow-up C, part 1)
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (serial after Task 3 on the same file)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs` (message decls near `RediscoverTick` ~110-115; add a `Connected`-state receive)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverInstanceActorDiscoveryTests.cs`
|
||||
|
||||
**Context:** Task 7 (`DriverHostActor`) will `Tell` a driver child to re-run discovery after a rebind. The child must accept that message and only act when `Connected`.
|
||||
|
||||
**Step 1 — Failing tests (2):**
|
||||
1. Send `TriggerRediscovery` to an actor whose driver is `Connected` → it runs a discovery pass and publishes `DiscoveredNodesReady` (respecting policy: a `Never` driver does NOT).
|
||||
2. Send `TriggerRediscovery` before connect / while not `Connected` → no `DiscoveredNodesReady`, no crash (no-op).
|
||||
|
||||
**Step 2 — Verify they fail:** message type doesn't exist.
|
||||
|
||||
**Step 3 — Implement:**
|
||||
- Add `public sealed record TriggerRediscovery();` near the other public messages.
|
||||
- In the `Connected` state, add a receive for `TriggerRediscovery` that calls `StartDiscovery()` (which already honors policy + the `ITagDiscovery` guard, and uses the current `_initGeneration`).
|
||||
- In other states, either don't register the receive (so it's unhandled = no-op) or register a no-op. Prefer registering only in `Connected` so a non-connected child silently ignores it (verify the actor's state-machine style — match how other state-scoped messages are handled). Ensure no `Unhandled`-logging noise; if the actor logs unhandled messages, add an explicit ignore in the relevant states.
|
||||
|
||||
**Step 4 — Verify:** both tests pass; suite green; build 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): DriverInstanceActor.TriggerRediscovery message (follow-up C)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 5: EquipmentNode carries DriverInstanceId/DeviceId/DeviceHost (follow-up E, projection)
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** Task 1, Task 2
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/AddressSpaceComposer.cs` (`EquipmentNode` record line 61; projection ~326-332; `Compose` signatures ~281-312)
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DeploymentArtifact.cs` (`ReadEquipmentNode` ~810-820; the equipment decode call ~204; `Empty()` ~362-367; add a `Devices`-array → `DeviceId`→host map)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/` (composer projection test) and the existing artifact-decode/parity test for `EquipmentNode` (search `tests/` for `ReadEquipmentNode`/`EquipmentNodes`/`DeploymentArtifact` coverage; if a Compose-vs-decode parity test exists, extend it)
|
||||
|
||||
**Context:** The artifact already serialises full `Equipment` rows (incl. nullable `DriverInstanceId`, `DeviceId`) and a full `Devices` array (each `Device` has `DeviceId` + schemaless `DeviceConfig` JSON containing FOCAS's `HostAddress`). `Compose` (pure) and `DeploymentArtifact` (decode) MUST produce identical `EquipmentNode`s. `_lastComposition` (used by the resolver) always comes from decode, but parity is still required by tests.
|
||||
|
||||
**Step 1 — Failing tests:**
|
||||
- Composer: given an `Equipment` with `DriverInstanceId="d1"`, `DeviceId="dev1"`, and a `Device{DeviceId="dev1", DeviceConfig={"HostAddress":"10.0.0.5:8193"}}`, `Compose(...)` yields `EquipmentNode` with `DriverInstanceId=="d1"`, `DeviceId=="dev1"`, `DeviceHost=="10.0.0.5:8193"`; with no device assigned → all three null.
|
||||
- Decode: an artifact JSON whose `Equipment` element has those fields + a matching `Devices` element decodes to the same `EquipmentNode`.
|
||||
|
||||
**Step 2 — Verify they fail:** `EquipmentNode` has no such fields.
|
||||
|
||||
**Step 3 — Implement:**
|
||||
- Extend the record (defaulted params keep all existing call sites compiling):
|
||||
```csharp
|
||||
public sealed record EquipmentNode(
|
||||
string EquipmentId,
|
||||
string DisplayName,
|
||||
string UnsLineId,
|
||||
string? DriverInstanceId = null,
|
||||
string? DeviceId = null,
|
||||
string? DeviceHost = null);
|
||||
```
|
||||
- Add a shared host-extraction helper usable by BOTH sides (place it where both can call it without a new project dependency — e.g. a `public static string? TryExtractDeviceHost(string? deviceConfigJson)` on `AddressSpaceComposer`, parsing the top-level `"HostAddress"` string from the `DeviceConfig` JSON; return null if absent/unparseable). Add a normalization step (trim; lower-case host) and DOCUMENT that the discovered device-host folder segment must be normalized the same way in Task 9.
|
||||
- `Compose`: add an optional `IReadOnlyList<Device>? devices = null` param to BOTH overloads (forward from the 5-arg overload as empty). Build `deviceHostById = devices.ToDictionary(d => d.DeviceId, d => TryExtractDeviceHost(d.DeviceConfig))`. In the equipment projection, set `DriverInstanceId: e.DriverInstanceId`, `DeviceId: e.DeviceId`, `DeviceHost: e.DeviceId is null ? null : deviceHostById.GetValueOrDefault(e.DeviceId)`.
|
||||
- `DeploymentArtifact`: read the `Devices` array (decode `DeviceId` + `DeviceConfig`) into a `DeviceId`→host map using the SAME `TryExtractDeviceHost` helper; thread it into `ReadEquipmentNode` (change its signature to accept the map, or do a post-pass) so it reads `DriverInstanceId`/`DeviceId` from the element and resolves `DeviceHost` from the map. Update `Empty()` only if its arity changed (it won't — record params are defaulted).
|
||||
- **Parity:** ensure the decode-side host normalization is byte-identical to `Compose`'s (same helper). If a Compose-vs-decode parity test exists, pass the same `Devices` to `Compose` in that test.
|
||||
|
||||
**Step 4 — Verify:** new tests pass; `OpcUaServer.Tests` + `Runtime.Tests` green; build 0 warnings. **Run the existing artifact-parity test** — it MUST stay green.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): EquipmentNode carries DriverInstanceId/DeviceId/DeviceHost (follow-up E projection)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 6: DriverHostActor — cache-as-dict + driver-level equipment resolution (follow-up E, part 1)
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (serial: first DriverHostActor task; needs Task 5)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (`_discoveredByDriver` field ~168; `HandleDiscoveredNodes` ~580-639; `ApplyDiscoveredPlan` ~658-701; `RoutingEquals`; redeploy re-inject tail ~1247-1290)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
|
||||
|
||||
**Context:** Today `_discoveredByDriver` is `Dictionary<string, DiscoveredInjectionPlan>` (one plan per driver) and equipment is resolved ONLY from authored `EquipmentTags`. This task (1) changes the cache value to a per-equipment map so Task 9 can add multiple equipments, and (2) makes resolution also use the equipment-level driver link so a driver with an assigned equipment but ZERO authored tags still grafts. **Still requires exactly one resolved equipment here** (multi-device is Task 9) — >1 keeps the current warn+skip.
|
||||
|
||||
**Step 1 — Failing tests:**
|
||||
- Tag-less graft: composition has an `EquipmentNode{DriverInstanceId="d1"}` with NO authored `EquipmentTags` for `d1`; `DiscoveredNodesReady("d1", nodes)` → nodes graft under that equipment (today: skipped with "no equipment/authored tags").
|
||||
- Regression: the existing single-equipment-with-authored-tags test still grafts identically (collapse retained).
|
||||
|
||||
**Step 2 — Verify it fails:** tag-less case is skipped today.
|
||||
|
||||
**Step 3 — Implement:**
|
||||
- Change `_discoveredByDriver` to `Dictionary<string, IReadOnlyDictionary<string, DiscoveredInjectionPlan>>` (driverId → (equipmentId → plan)). Update ALL readers: `HandleDiscoveredNodes` short-circuit, `ApplyDiscoveredPlan`, and the redeploy re-inject tail must iterate the inner map.
|
||||
- New resolution in `HandleDiscoveredNodes`: candidate equipments =
|
||||
`_lastComposition.EquipmentNodes.Where(e => e.DriverInstanceId == driverId).Select(e => e.EquipmentId)`
|
||||
**∪** the existing authored-tag-derived set. Distinct.
|
||||
- 0 → log Info, skip (unchanged message).
|
||||
- 1 → resolve `equipmentId`; authoredRefs for that driver as today; `DiscoveredNodeMapper.Map(equipmentId, nodes, authoredRefs)`; cache as a 1-entry inner map; apply.
|
||||
- >1 → for THIS task, keep `_log.Warning(... "multi-equipment-per-driver is handled in the multi-device path")` + skip. (Task 9 replaces this branch.)
|
||||
- `ApplyDiscoveredPlan`: keep applying a single `(equipmentId, plan)`; callers now iterate the inner map and call it per entry. The subscription-merge union must include ALL discovered routing keys across the driver's plans (so a multi-plan driver subscribes every device's refs). Keep the authored value/alarm ref computation.
|
||||
- `RoutingEquals` short-circuit: compare the FULL new inner-map routing against the cached inner-map routing (skip re-apply only when every equipment's routing is unchanged).
|
||||
- Redeploy re-inject tail: iterate `_discoveredByDriver`; for each driver, re-resolve candidates from the CURRENT composition; per cached `(equipmentId, plan)` entry, keep the existing drop rules (equipment no longer resolves / plan NodeIds not scoped to `equipmentId`) but applied per-entry; re-apply surviving entries. (Task 7 will add the re-trigger on drop; Task 8 the de-dup.)
|
||||
|
||||
**Step 4 — Verify:** new + existing `DriverHostActorDiscoveryTests` green; `Runtime.Tests` green; build 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): driver-level equipment resolution + per-equipment discovered-plan cache (follow-up E)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 7: DriverHostActor — re-trigger discovery on rebind drop (follow-up C, part 2)
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~4 min
|
||||
**Parallelizable with:** none (serial after Task 6; needs Task 4's message)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (redeploy re-inject tail drop branches ~1264-1288; update the deliberate-`won't-fix` comment)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
|
||||
|
||||
**Context:** When the re-inject tail DROPS a cached plan because the equipment rebound/no-longer-resolves, the FixedTree stays absent under the new equipment until the driver's next natural reconnect. Re-trigger discovery so it re-grafts promptly.
|
||||
|
||||
**Step 1 — Failing test:** simulate a redeploy where a driver's equipment changed (cached plan scoped to old `EQ-1`, new composition binds the driver to `EQ-2`). Assert the driver child receives `DriverInstanceActor.TriggerRediscovery` after the drop. (Use the test harness's child-probe/TestProbe pattern already used in this file for asserting messages to driver children.)
|
||||
|
||||
**Step 2 — Verify it fails:** no re-trigger today.
|
||||
|
||||
**Step 3 — Implement:** in each drop branch (the two `Remove` sites), after removing the entry, `Tell` that driver's child actor `new DriverInstanceActor.TriggerRediscovery()` (guard: only if the child exists in `_children`). Update the inline comment: the previous "we deliberately do NOT add re-trigger logic" note becomes a description of the new re-trigger (discovery-only, idempotent, child no-ops if not `Connected`). If a driver maps to MULTIPLE cached equipment entries and only one drops, still send a single `TriggerRediscovery` (discovery re-resolves all of them) — de-dupe so a driver is told at most once per re-inject pass.
|
||||
|
||||
**Step 4 — Verify:** test passes; suite green; build 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): re-trigger discovery on config-unchanged rebind (follow-up C)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 8: DriverHostActor — single SetDesiredSubscriptions per redeploy (follow-up D)
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (serial after Task 7)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (`PushDesiredSubscriptions` bulk loop ~1204; re-inject tail interaction)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
|
||||
|
||||
**Context:** During an in-process redeploy a cached driver gets the bulk authored-only `SetDesiredSubscriptions` (line 1204) AND then the union from `ApplyDiscoveredPlan` (line 697) — one extra unsub/resub blip. Make it exactly one send per driver.
|
||||
|
||||
**Step 1 — Failing test:** redeploy with one driver that has a cached discovered plan; assert the driver child receives `SetDesiredSubscriptions` EXACTLY ONCE during the redeploy, and that the single payload is the authored∪discovered UNION. Add a second test: a driver whose cached plan is DROPPED in the re-inject tail (rebind) still receives exactly one `SetDesiredSubscriptions` carrying the AUTHORED-ONLY set (fallback) — its authored subscriptions must not be lost.
|
||||
|
||||
**Step 2 — Verify it fails:** today the cached-driver case sends twice.
|
||||
|
||||
**Step 3 — Implement:**
|
||||
- In the bulk loop, SKIP the send for any `driverId` present in `_discoveredByDriver` (capture the key set BEFORE the re-inject tail runs).
|
||||
- Re-inject tail: when a cached plan is APPLIED, `ApplyDiscoveredPlan` already sends the union (covers authored). When a cached plan is DROPPED (all entries for the driver removed → the driver no longer has any cached plan), send the authored-only `SetDesiredSubscriptions` for that driver as a fallback (mirror the bulk-loop payload: authored value refs + alarm refs, `SubscriptionPublishingInterval`).
|
||||
- Ensure the invariant holds for drivers WITHOUT a cached plan (unchanged: single bulk send) and drivers added/removed by the reconcile.
|
||||
|
||||
**Step 4 — Verify:** both tests pass; the existing redeploy/restore tests stay green (watch for any test asserting the old double-send count); build 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "perf(otopcua): one SetDesiredSubscriptions per driver per redeploy (follow-up D)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 9: DriverHostActor — multi-device-per-driver partition (follow-up E, part 2)
|
||||
|
||||
**Classification:** high-risk
|
||||
**Estimated implement time:** ~5 min
|
||||
**Parallelizable with:** none (serial after Task 8; needs Task 5's DeviceHost)
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs` (`HandleDiscoveredNodes` >1-candidate branch from Task 6)
|
||||
- Test: `tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorDiscoveryTests.cs`
|
||||
|
||||
**Context:** Replace Task 6's ">1 candidate → warn+skip" with a real partition. Each candidate equipment has `EquipmentNode.DeviceHost` (from Task 5). The discovered nodes carry a device-host folder segment at `FolderPathSegments[1]` (FOCAS uses `device.HostAddress`). Partition nodes by that segment, normalize it the SAME way Task 5 normalized `DeviceHost`, and map each device's subset under the matching equipment via the existing `DiscoveredNodeMapper.Map` (a single-device subset → collapse kicks in per equipment → clean `EQ-n/FOCAS/Identity/...`).
|
||||
|
||||
**Step 1 — Failing tests:**
|
||||
- Multi-device: driver `d1` resolves to `EQ-A{DeviceHost=h1}` and `EQ-B{DeviceHost=h2}`; discovered nodes split across folder segments `h1`/`h2`; assert `h1`'s subtree grafts under `EQ-A` and `h2`'s under `EQ-B`, each routing-keyed correctly, and `_discoveredByDriver["d1"]` has two entries.
|
||||
- Unmatched device-host → warn-skip: a discovered segment `h3` with no matching equipment is NOT grafted (logged Warning), while `h1`/`h2` still graft.
|
||||
- Degenerate: >1 candidate but NO `DeviceHost` data anywhere → falls back to warn+skip (no crash, no mis-graft).
|
||||
|
||||
**Step 2 — Verify it fails:** Task 6 left this as warn+skip.
|
||||
|
||||
**Step 3 — Implement:** in the >1-candidate branch, build `hostToEquipment = candidates.Where(e => e.DeviceHost != null).ToDictionary(Normalize(e.DeviceHost), e.EquipmentId)` (guard duplicate hosts → warn+skip the ambiguous host). Partition `nodes` by `Normalize(FolderPathSegments.Count >= 2 ? FolderPathSegments[1] : null)`. For each partition with a matching equipment: compute that equipment's authoredRefs, `Map(equipmentId, partitionNodes, authoredRefs)`, collect into the inner `(equipmentId → plan)` map. Unmatched partitions → `_log.Warning` + skip. Cache the multi-entry inner map and apply every entry (Task 6 made apply per-entry). Use the SAME normalization helper from Task 5 (factor it so both call it).
|
||||
|
||||
**Step 4 — Verify:** all three tests pass; single-device + tag-less tests from Task 6 still green; `Runtime.Tests` + `OpcUaServer.Tests` + FOCAS suites green; build 0 warnings.
|
||||
|
||||
**Step 5 — Commit:** `git commit -m "feat(otopcua): multi-device-per-driver FixedTree partition (follow-up E)"`
|
||||
|
||||
---
|
||||
|
||||
### Task 10: Docs — update follow-up notes + design statuses
|
||||
|
||||
**Classification:** trivial
|
||||
**Estimated implement time:** ~3 min
|
||||
**Parallelizable with:** none (after Task 9)
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-design.md` (the "Follow-ups surfaced during the review chain" section + the decisions-table multi-device row — mark A–E DONE, note the rebind re-trigger now exists)
|
||||
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-followups-design.md` (Status → Implemented)
|
||||
- Modify: `docs/plans/2026-06-26-otopcua-fixedtree-equipment-injection-RESUME.md` (§3 — strike the now-closed follow-ups)
|
||||
|
||||
**Steps:** update the prose to reflect what shipped (each follow-up + the fact that E required no migration / no artifact change; the rebind re-trigger reversed the earlier `won't-fix`, cleanly). Commit: `git commit -m "docs(otopcua): record FixedTree follow-ups A-E as implemented"`
|
||||
|
||||
---
|
||||
|
||||
### Task 11: Build + full offline suite + regression gate
|
||||
|
||||
**Classification:** standard
|
||||
**Estimated implement time:** ~4 min (mostly test wall-time)
|
||||
**Parallelizable with:** none (final; after Tasks 2, 4, 9, 10)
|
||||
|
||||
**Files:** none (verification only)
|
||||
|
||||
**Steps:**
|
||||
1. `dotnet build ZB.MOM.WW.OtOpcUa.slnx` → **0 errors, 0 warnings**.
|
||||
2. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~Runtime.Tests"` → all green.
|
||||
3. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~OpcUaServer.Tests"` → all green.
|
||||
4. `dotnet test ZB.MOM.WW.OtOpcUa.slnx --filter "FullyQualifiedName~FOCAS"` → all green (live-wire integration tests skip without the CNC — expected).
|
||||
5. Confirm the validated single-device FOCAS injection path is unchanged (the relevant `DriverHostActorDiscoveryTests`/end-to-end test passes untouched). Report counts. Do NOT run a full-solution `dotnet test` (net48 Wonderware testhost can't run on macOS).
|
||||
|
||||
No commit (verification). Live wonder re-validation is optional + user-gated.
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-26-otopcua-fixedtree-followups.md",
|
||||
"tasks": [
|
||||
{"id": 1, "subject": "Task 1: Injectable discovery timeout (A)", "status": "completed", "nativeId": 38, "commits": ["c2c368dc"]},
|
||||
{"id": 2, "subject": "Task 2: RediscoverPolicy enum + driver overrides (B1)", "status": "completed", "nativeId": 39, "commits": ["a378b572", "efbdaf85"]},
|
||||
{"id": 3, "subject": "Task 3: DriverInstanceActor honors policy (B2)", "status": "completed", "blockedBy": [1, 2], "nativeId": 40, "commits": ["ce34816a", "a1a655e6"]},
|
||||
{"id": 4, "subject": "Task 4: TriggerRediscovery message + handler (C1)", "status": "completed", "blockedBy": [3], "nativeId": 41, "commits": ["f7358bf4", "e7d5ebe9"]},
|
||||
{"id": 5, "subject": "Task 5: EquipmentNode DriverInstanceId/DeviceId/DeviceHost (E projection)", "status": "completed", "nativeId": 42, "commits": ["cb7ce7f1", "915492a7"]},
|
||||
{"id": 6, "subject": "Task 6: DriverHostActor cache-as-dict + driver-level resolution (E1)", "status": "completed", "blockedBy": [5], "nativeId": 43, "commits": ["adcd7b57"]},
|
||||
{"id": 7, "subject": "Task 7: Re-trigger discovery on rebind drop (C2)", "status": "completed", "blockedBy": [4, 6], "nativeId": 44, "commits": ["53367148", "cde16063"]},
|
||||
{"id": 8, "subject": "Task 8: Single SetDesiredSubscriptions per redeploy (D)", "status": "completed", "blockedBy": [7], "nativeId": 45, "commits": ["05c82079", "51721df5"]},
|
||||
{"id": 9, "subject": "Task 9: Multi-device-per-driver partition (E2)", "status": "completed", "blockedBy": [5, 8], "nativeId": 46, "commits": ["50f08635", "0074f37a"]},
|
||||
{"id": 10, "subject": "Task 10: Docs — follow-up notes + statuses", "status": "completed", "blockedBy": [9], "nativeId": 47},
|
||||
{"id": 11, "subject": "Task 11: Build + offline suite + regression gate", "status": "pending", "blockedBy": [2, 4, 9, 10], "nativeId": 48}
|
||||
],
|
||||
"nativeTaskIds": {
|
||||
"1": 38, "2": 39, "3": 40, "4": 41, "5": 42, "6": 43,
|
||||
"7": 44, "8": 45, "9": 46, "10": 47, "11": 48
|
||||
},
|
||||
"lastUpdated": "2026-06-26T00:00:00Z",
|
||||
"status": "code+docs complete; final build/suite gate pending",
|
||||
"branch": "feat/focas-fixedtree-equipment-injection"
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
# OtOpcUa ↔ HistorianGateway Historian Backend — Design
|
||||
|
||||
**Date:** 2026-06-26
|
||||
**Status:** Design approved; implementation in two plans (see end).
|
||||
**Repos:** `~/Desktop/HistorianGateway` (gateway + client lib), `~/Desktop/OtOpcUa` (OPC UA server consumer)
|
||||
|
||||
---
|
||||
|
||||
## 1. Goal
|
||||
|
||||
Make **HistorianGateway** the historian read/write backend for the **OtOpcUa** OPC UA server,
|
||||
serving two distinct use cases:
|
||||
|
||||
1. **Read of historic values for mxaccessgw-served (Galaxy) tags & alarms.** Galaxy tags are
|
||||
already historized by AVEVA's own IOServer/AppServer pipeline; OtOpcUa serves their history
|
||||
to OPC UA `HistoryRead` clients by reading them back through the gateway.
|
||||
2. **Full read/write historian backend for non-mxaccessgw tags & alarms** (Modbus / S7 / AB /
|
||||
TwinCAT / FOCAS / scripted-alarm sources). These are *not* historized by AVEVA, so OtOpcUa
|
||||
**records** their live value changes and alarm events into the historian through the gateway,
|
||||
then reads them back through the same path.
|
||||
|
||||
The vehicle, per decision, is a **dedicated .NET gRPC client library** for the gateway —
|
||||
`ZB.MOM.WW.HistorianGateway.Client` — built "similar to the mxaccessgw client"
|
||||
(`ZB.MOM.WW.MxGateway.Client`), which OtOpcUa consumes as a Gitea-feed package.
|
||||
|
||||
## 2. Locked decisions (from brainstorming)
|
||||
|
||||
| Decision | Choice |
|
||||
|---|---|
|
||||
| Write model for non-Galaxy tags | **Continuous historization** — OtOpcUa records live value changes automatically |
|
||||
| Relation to existing Wonderware TCP-sidecar backend | **Replace it** — gateway becomes the sole historian backend; retire the Wonderware driver projects |
|
||||
| Alarm/event history | **In scope for v1** — `HistoryReadEvents` from the gateway + route OtOpcUa alarm events to `SendEvent` |
|
||||
| Client library location & consumption | **In the gateway repo (`clients/dotnet/`), published to the Gitea feed**; OtOpcUa references Contracts + Client as packages (mirrors how it already consumes `ZB.MOM.WW.GalaxyRepository @ 0.2.0`) |
|
||||
| Continuous-historization durability | **Mirror the gateway's StoreForward design** — an OtOpcUa-side crash-safe FasterLog append-only outbox (so values buffer durably when the *gateway itself* is unreachable) |
|
||||
| Deliverable | **One design doc, two implementation plans** (gateway-client plan; OtOpcUa-integration plan) |
|
||||
|
||||
## 3. Why this is tractable — the seams already exist
|
||||
|
||||
OtOpcUa's historian integration was designed for pluggable backends. The gateway slots into
|
||||
seams that are already in place; only two genuinely-new pieces are required (the recorder and tag
|
||||
provisioning).
|
||||
|
||||
| OtOpcUa seam | File | Role for us |
|
||||
|---|---|---|
|
||||
| `IHistorianDataSource` | `src/Core/…Core.Abstractions/Historian/IHistorianDataSource.cs` | Read surface (`ReadRaw/ReadProcessed/ReadAtTime/ReadEvents` + `GetHealthSnapshot`); wired into the NodeManager's `HistoryReadRawModified`/`HistoryReadEvents` overrides |
|
||||
| `IAlarmHistorianWriter` | `src/Core/…Core.AlarmHistorian/IAlarmHistorianSink.cs` | Alarm-event write surface (`WriteBatchAsync(batch)`); already fronted by `SqliteStoreAndForwardSink` |
|
||||
| `AddServerHistorian(cfg, factory)` | `src/Server/…Runtime/ServiceCollectionExtensions.cs` | Generic over `Func<ServerHistorianOptions, IServiceProvider, IHistorianDataSource>` — swap the factory, zero change to Runtime/OpcUaServer |
|
||||
| `AddAlarmHistorian(cfg, writerFactory)` | same | Generic over the `IAlarmHistorianWriter` factory — swap to the gateway writer |
|
||||
| `DependencyMuxActor` | `src/Server/…Runtime/VirtualTags/DependencyMuxActor.cs` | Value-change fan-out (`RegisterInterest` + `AttributeValuePublished`) — the tap point for continuous historization |
|
||||
| `AddressSpaceApplier.Apply()` | `src/Server/…OpcUaServer/AddressSpaceApplier.cs` | Per-tag iteration over `plan.AddedEquipmentTags.Where(IsHistorized)` — the hook for `EnsureTags` provisioning |
|
||||
|
||||
**Currently** these seams are filled by `WonderwareHistorianClient` (a single class implementing
|
||||
both `IHistorianDataSource` and `IAlarmHistorianWriter` over a bespoke **TCP FrameChannel** to an
|
||||
ArchestrA-SDK sidecar) — exactly the COM-bound approach HistorianGateway was built to replace.
|
||||
|
||||
## 4. Gateway gRPC surface vs. OtOpcUa needs
|
||||
|
||||
The gateway's `historian_gateway.v1` contract already covers the surface. Mapping:
|
||||
|
||||
| OtOpcUa need | Gateway RPC | Notes |
|
||||
|---|---|---|
|
||||
| `ReadRawAsync` | `HistorianRead.ReadRaw` (stream) | direct |
|
||||
| `ReadProcessedAsync` | `HistorianRead.ReadAggregate` (stream) | `HistoryAggregateType` → `RetrievalMode` mapping (§6) |
|
||||
| `ReadAtTimeAsync` | `HistorianRead.ReadAtTime` (unary) | direct |
|
||||
| `ReadEventsAsync` | `HistorianRead.ReadEvents` (stream) | needs gateway `RuntimeDb:EventReadsEnabled=true` (C2 SQL path) + source-name filter (gateway gap §5) |
|
||||
| continuous value write | `HistorianWrite.WriteLiveValues` | SQL live path; needs gateway `RuntimeDb:Enabled=true`; numeric/analog only (§7) |
|
||||
| alarm event write | `HistorianWrite.SendEvent` | maps `AlarmHistorianEvent` → `HistorianEvent` |
|
||||
| tag provisioning | `HistorianTags.EnsureTags` | `DriverDataType` → `HistorianDataType` mapping (§6) |
|
||||
| health/diagnostics | `HistorianStatus.Probe` / `GetConnectionStatus` | feeds `GetHealthSnapshot()` |
|
||||
|
||||
Galaxy hierarchy browse (`GalaxyRepository` service) is **not** needed here — OtOpcUa already gets
|
||||
Galaxy hierarchy via mxaccessgw's `GalaxyRepositoryClient`.
|
||||
|
||||
## 5. What gets added to HistorianGateway
|
||||
|
||||
1. **`ZB.MOM.WW.HistorianGateway.Client`** (NEW, `clients/dotnet/`). Clones the `MxGatewayClient`
|
||||
pattern: `HistorianGatewayClient.Create(options)` owning a `GrpcChannel` over a
|
||||
`SocketsHttpHandler` (TLS, connect timeout), Polly resilience pipeline (retry transient codes
|
||||
only), `histgw_<id>_<secret>` bearer key attached in the `authorization` metadata header,
|
||||
typed exception hierarchy, and wrappers for all five services (unary → `Task<T>`, streaming →
|
||||
`IAsyncEnumerable<T>`). Packable NuGet, references the Contracts project.
|
||||
2. **Make `ZB.MOM.WW.HistorianGateway.Contracts` packable + publish to the Gitea feed** (it has no
|
||||
packaging props today). Mirrors `ZB.MOM.WW.MxGateway.Contracts @ 0.1.x`. This is what lets the
|
||||
Client and OtOpcUa consume generated `historian_gateway.v1` types as a package.
|
||||
3. **SQL `ReadEvents` source-name filter** (small enhancement, coordinated with the in-flight
|
||||
`feat/sql-readevents` branch). The SQL event-read path is currently time-range-only
|
||||
(per-property filter → `Unimplemented`); add `Source_Object` filtering so OtOpcUa's
|
||||
`ReadEventsAsync(sourceName, …)` is server-filtered rather than full-window + client-side filter.
|
||||
4. **Optional smoke CLI** (`…Client.Cli`) mirroring `mxgw` cli — manual live checks.
|
||||
5. **Deployment/config prerequisites** (no code): the gateway OtOpcUa points at must run with
|
||||
`RuntimeDb:Enabled=true` (WriteLiveValues) **and** `RuntimeDb:EventReadsEnabled=true` (alarm
|
||||
reads). Provision an API key carrying `historian:read`, `historian:write`,
|
||||
`historian:tags:write`.
|
||||
|
||||
## 6. Mapping tables (single source of truth for the mappers)
|
||||
|
||||
**`HistoryAggregateType` (OPC UA) → `RetrievalMode` (gateway).** Mirror the existing
|
||||
`WonderwareHistorianClient.ReadProcessedAsync` mapping as the authoritative reference; expected:
|
||||
|
||||
| `HistoryAggregateType` | `RetrievalMode` |
|
||||
|---|---|
|
||||
| `Average` | `TimeWeightedAverage` |
|
||||
| `Minimum` | `MinimumWithTime` |
|
||||
| `Maximum` | `MaximumWithTime` |
|
||||
| `Total` | `Integral` |
|
||||
| `Count` | `Counter` *(verify against Wonderware client; may have no exact native mode)* |
|
||||
|
||||
**`DriverDataType` (OtOpcUa) → `HistorianDataType` (gateway), for `EnsureTags`/`WriteLiveValues`.**
|
||||
Constrained by which writes are server-proven (CLAUDE.md: write-captured = Int1/2/4/8, UInt4/8,
|
||||
Float, Double):
|
||||
|
||||
| `DriverDataType` | `HistorianDataType` | Write status |
|
||||
|---|---|---|
|
||||
| `Boolean` | `Int1` | proven |
|
||||
| `Int16` | `Int2` | proven |
|
||||
| `Int32` | `Int4` | proven |
|
||||
| `Int64` | `Int8` | proven |
|
||||
| `UInt16` | `UInt4` *(fallback — UInt2 write is deferred upstream)* | proven via fallback |
|
||||
| `UInt32` | `UInt4` | proven |
|
||||
| `UInt64` | `UInt8` | proven |
|
||||
| `Float32` | `Float` | proven |
|
||||
| `Float64` | `Double` | proven |
|
||||
| `String` | `SingleByteString` | **deferred — gated upstream; not historized in v1** |
|
||||
| `DateTime` | `FileTime` | **deferred — not on the analog write path** |
|
||||
| `Reference` | (string) | **deferred** |
|
||||
|
||||
**`HistorianSample` → `DataValueSnapshot`:** `Value` ← numeric/string value; `StatusCode` ←
|
||||
quality translated to OPC UA status (reuse Wonderware client's quality translation);
|
||||
`SourceTimestampUtc` ← sample timestamp; `ServerTimestampUtc` ← received/processing time.
|
||||
|
||||
**`HistorianEvent` → `HistoricalEvent`:** `EventId` ← id; `SourceName` ← source_name;
|
||||
`EventTimeUtc` ← event_time; `ReceivedTimeUtc` ← received_time; `Message` ← properties (rendered);
|
||||
`Severity` ← properties (Priority/Severity) mapped to OPC UA 1–1000.
|
||||
|
||||
**`AlarmHistorianEvent` → `HistorianEvent` (SendEvent):** `source_name` ← `EquipmentPath`;
|
||||
`event_time` ← `TimestampUtc`; `type` ← `AlarmTypeName`; rich fields (`AlarmName`, `EventKind`,
|
||||
`Severity`, `User`, `Comment`, `Message`) carried in the `properties` map.
|
||||
|
||||
## 7. New OtOpcUa components
|
||||
|
||||
```
|
||||
NEW src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway/
|
||||
GatewayHistorianDataSource : IHistorianDataSource — read adapter over the client
|
||||
GatewayAlarmHistorianWriter : IAlarmHistorianWriter — SendEvent; behind existing SqliteStoreAndForwardSink
|
||||
GatewayTagProvisioner : IHistorianProvisioning — EnsureTags (NEW interface)
|
||||
Mappers — the §6 tables, with matrix-guard unit tests
|
||||
NEW ContinuousHistorizationRecorder (Runtime actor + FasterLog outbox)
|
||||
- registers RegisterInterest with DependencyMuxActor for historized non-Galaxy tag refs
|
||||
- appends each AttributeValuePublished to a crash-safe FasterLog outbox (PerEntry/Periodic
|
||||
commit, mirroring the gateway's FasterLogOutboxStore)
|
||||
- background drainer batches → client.WriteLiveValues; commits/truncates on ack; backoff on
|
||||
failure; outbox-full → drop-oldest + metric
|
||||
HOOK AddressSpaceApplier.Apply() — for plan.AddedEquipmentTags.Where(IsHistorized) →
|
||||
provisioner.EnsureTags (non-blocking; failures logged + counted, never block publish)
|
||||
SWAP Program.cs — AddServerHistorian + AddAlarmHistorian factories construct the Gateway-backed impls
|
||||
CONF ServerHistorian options reshaped to gateway form (Endpoint / ApiKey / Tls); drop SharedSecret
|
||||
RETIRE src/Drivers/*Wonderware* (3 src + 2 test projects) after live validation
|
||||
```
|
||||
|
||||
## 8. Data flow
|
||||
|
||||
**Use case 1 — Galaxy tag history read:**
|
||||
`UA HistoryRead → OtOpcUaNodeManager.HistoryReadRawModified → GatewayHistorianDataSource.ReadRaw →
|
||||
client.ReadRaw → gateway → AVEVA historian (already historized by AVEVA IOServer)`.
|
||||
|
||||
**Use case 2 — non-Galaxy tag record + read:**
|
||||
- *Provision (deploy):* `AddressSpaceApplier.Apply → GatewayTagProvisioner.EnsureTags → client.EnsureTags → gateway`.
|
||||
- *Record (runtime):* `driver value change → DriverInstanceActor.AttributeValuePublished → DependencyMuxActor → ContinuousHistorizationRecorder → FasterLog outbox → drainer → client.WriteLiveValues → gateway (SQL live path)`.
|
||||
- *Read back:* same path as use case 1.
|
||||
- *Alarms:* `ScriptedAlarmEngine → HistorianAdapterActor → SqliteStoreAndForwardSink → GatewayAlarmHistorianWriter.WriteBatchAsync → client.SendEvent`; alarm-history read via `GatewayHistorianDataSource.ReadEvents → client.ReadEvents`.
|
||||
|
||||
## 9. Error handling
|
||||
|
||||
- **Client:** `RpcException` → typed hierarchy (`HistorianGatewayException`,
|
||||
`…AuthenticationException`/`Unauthenticated`, `…AuthorizationException`/`PermissionDenied`,
|
||||
`…UnavailableException`/`Unavailable`). Polly retries transient codes only.
|
||||
- **Read adapter:** quality → OPC UA `StatusCode` inside the data source; empty windows are not
|
||||
faults; backend errors surface as `Bad` snapshots, never crash a `HistoryRead`.
|
||||
- **Provisioning:** non-blocking — log + count failures; address-space publish always proceeds.
|
||||
- **Recorder:** append-to-outbox is the durable boundary; drain failures back off; outbox-full →
|
||||
drop-oldest + metric; health via `GetHealthSnapshot` + meter.
|
||||
|
||||
## 10. Testing
|
||||
|
||||
- **Client lib:** fake-transport unit tests (clone mxaccessgw `FakeGatewayTransport`) — auth-header
|
||||
attach, retry, streaming, exception mapping; golden proto round-trips; smoke CLI.
|
||||
- **OtOpcUa adapter:** unit tests with a fake `IHistorianGatewayClient` for every §6 mapper
|
||||
(matrix-guard so a new enum member fails the build); recorder tested against fake outbox + fake
|
||||
client (batch/drain/outage/drop); provisioning hook over a synthetic plan.
|
||||
- **Live (env-gated, skips without VPN):** reuse the `wonder-sql-vd03` fixture — Galaxy-tag read
|
||||
round-trip; write→read round-trip on a `HistGW.LiveTest.*` tag; alarm `SendEvent`→`ReadEvents`.
|
||||
|
||||
## 11. Verify-live risks (settle during implementation, not now)
|
||||
|
||||
1. **Galaxy-tag → historian-tag identity** — does OtOpcUa's `historianTagname`
|
||||
(`tag_name.Attribute`) match the AVEVA historian tag name? Confirm against `wonder-sql-vd03` early.
|
||||
2. **UInt16 / String / DateTime write gaps** — continuous historization is numeric-analog only in
|
||||
v1; documented mappings/fallbacks in §6, not silent drops.
|
||||
3. **Alarm-history reads depend on `feat/sql-readevents`** landing + gateway
|
||||
`RuntimeDb:EventReadsEnabled=true`; the source-name filter (§5.3) is the one coordinated gateway
|
||||
enhancement.
|
||||
4. **`WriteLiveValues` requires gateway `RuntimeDb:Enabled=true`** and an `EnsureTags`-provisioned tag.
|
||||
5. **`received_time` UTC semantics** on the SQL event/value paths (local vs UTC; `EventTimeUTCOffsetMins`)
|
||||
— inherit whatever the `feat/sql-readevents` work establishes.
|
||||
|
||||
## 12. Implementation plans
|
||||
|
||||
- **Plan 1 — Gateway client (`docs/plans/2026-06-26-historian-gateway-client.md`):** Contracts
|
||||
packable + publish → client options/channel/auth → Polly + exception mapping → per-service
|
||||
wrappers → fake-transport tests → CLI → SQL-ReadEvents source filter (coordinated) → live smoke.
|
||||
- **Plan 2 — OtOpcUa integration (`docs/plans/2026-06-26-otopcua-historian-gateway-integration.md`):**
|
||||
new Gateway driver project → mappers (matrix-guard) → read adapter + `AddServerHistorian` swap →
|
||||
alarm writer adapter + `AddAlarmHistorian` swap → `ReadEvents` alarm-history → continuous-
|
||||
historization recorder (FasterLog outbox) → `EnsureTags` provisioning hook → retire Wonderware →
|
||||
live-validate. (Authored here; relocates into `~/Desktop/OtOpcUa/docs/plans/` on its own branch
|
||||
when that phase starts, to avoid entangling OtOpcUa's current in-flight working tree.)
|
||||
|
||||
Plan 1 is a prerequisite for Plan 2 (OtOpcUa consumes the published Client package). Within Plan 2,
|
||||
the read path (phases through `AddServerHistorian` swap) is independently shippable and validates
|
||||
use case 1 before any write code lands.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"planPath": "docs/plans/2026-06-26-otopcua-historian-gateway-integration.md",
|
||||
"tasks": [
|
||||
{ "id": 0, "subject": "Task 1: Consume gateway packages + scaffold Gateway driver project", "status": "pending", "blockedBy": [] },
|
||||
{ "id": 1, "subject": "Task 2: HistoryAggregateType->RetrievalMode mapper (matrix-guarded)", "status": "pending", "blockedBy": [0] },
|
||||
{ "id": 2, "subject": "Task 3: DriverDataType->HistorianDataType mapper + write-gap fallbacks (matrix-guarded)", "status": "pending", "blockedBy": [0] },
|
||||
{ "id": 3, "subject": "Task 4: HistorianSample/Aggregate->DataValueSnapshot + quality mapper", "status": "pending", "blockedBy": [0] },
|
||||
{ "id": 4, "subject": "Task 5: HistorianEvent->HistoricalEvent mapper (+ severity)", "status": "pending", "blockedBy": [0] },
|
||||
{ "id": 5, "subject": "Task 6: AlarmHistorianEvent->HistorianEvent mapper (SendEvent)", "status": "pending", "blockedBy": [0] },
|
||||
{ "id": 6, "subject": "Task 7: GatewayHistorianDataSource read paths (raw/processed/at-time)", "status": "pending", "blockedBy": [1, 3] },
|
||||
{ "id": 7, "subject": "Task 8: GetHealthSnapshot via Probe/GetConnectionStatus", "status": "pending", "blockedBy": [6] },
|
||||
{ "id": 8, "subject": "Task 9: Reshape ServerHistorianOptions to gateway form", "status": "pending", "blockedBy": [0] },
|
||||
{ "id": 9, "subject": "Task 10: Swap AddServerHistorian factory in Program.cs (READ CUTOVER)", "status": "pending", "blockedBy": [6, 8] },
|
||||
{ "id": 10, "subject": "Task 11: ReadEventsAsync alarm-history on the data source", "status": "pending", "blockedBy": [6, 4] },
|
||||
{ "id": 11, "subject": "Task 12: GatewayAlarmHistorianWriter (SendEvent + outcome mapping)", "status": "pending", "blockedBy": [9, 5] },
|
||||
{ "id": 12, "subject": "Task 13: Swap AddAlarmHistorian factory in Program.cs", "status": "pending", "blockedBy": [11] },
|
||||
{ "id": 13, "subject": "Task 14: IHistorianProvisioning + GatewayTagProvisioner (EnsureTags)", "status": "pending", "blockedBy": [9, 2] },
|
||||
{ "id": 14, "subject": "Task 15: Hook provisioning into AddressSpaceApplier.Apply()", "status": "pending", "blockedBy": [13] },
|
||||
{ "id": 15, "subject": "Task 16: FasterLog historization outbox store", "status": "pending", "blockedBy": [9] },
|
||||
{ "id": 16, "subject": "Task 17: ContinuousHistorizationRecorder actor", "status": "pending", "blockedBy": [15, 9] },
|
||||
{ "id": 17, "subject": "Task 18: Wire recorder into DI + hosted lifecycle", "status": "pending", "blockedBy": [16] },
|
||||
{ "id": 18, "subject": "Task 19: Retire Wonderware historian projects", "status": "pending", "blockedBy": [9, 12, 17, 19] },
|
||||
{ "id": 19, "subject": "Task 20: Env-gated live validation vs wonder-sql-vd03", "status": "pending", "blockedBy": [9, 10, 12, 17] },
|
||||
{ "id": 20, "subject": "Task 21: Documentation (CLAUDE.md, appsettings, README)", "status": "pending", "blockedBy": [18] }
|
||||
],
|
||||
"lastUpdated": "2026-06-26"
|
||||
}
|
||||
@@ -0,0 +1,200 @@
|
||||
# OtOpcUa ↔ HistorianGateway — Follow-up & Deferred Items
|
||||
|
||||
**Status:** the 21-task integration (`feat/historian-gateway-backend`, Gitea PR
|
||||
[#423](https://gitea.dohertylan.com/dohertj2/lmxopcua/pulls/423)) + the continuous-historization
|
||||
ref-feed are complete and **live-validated** against `wonder-sql-vd03`. The offline suite is green;
|
||||
the live `Category=LiveIntegration` suite is green (read ✅, write-persist ✅, alarm-send ✅,
|
||||
alarm-readback ⏭ skip). This doc tracks everything deliberately deferred or surfaced during
|
||||
validation, with the **owning repo** for each.
|
||||
|
||||
> **Execution update (2026-06-27 — this follow-up pass):**
|
||||
> - **FU-1 — RESOLVED as a documented protocol limitation** (NOT a fixable gateway bug): the captured
|
||||
> CM_EVENT event-send wire never carries `SourceName`, so `Source_Object` cannot be populated by the
|
||||
> gateway. Recorded as `pending.md` **C4** + a CLAUDE.md note in the HistorianGateway repo (commit
|
||||
> `174a4a9` on `fix/gateway-otopcua-followups`). The OtOpcUa live test stays skipped with the corrected
|
||||
> reason. See FU-1 below for the (now-confirmed) root cause.
|
||||
> - **FU-2 — ✅ DONE + live-validated** in HistorianGateway (`fix/gateway-otopcua-followups`, commits
|
||||
> `150868c` + `1c2d11d`). The SQL live-write path converts UTC→server-local in-SQL via
|
||||
> `DATEADD(MINUTE, DATEPART(TZOFFSET, SYSDATETIMEOFFSET()), @dt)`; an explicit-timestamp round-trip is
|
||||
> now EXACT against the live historian (delta 00:00:00).
|
||||
> - **FU-3 — ✅ DONE** in OtOpcUa (this branch, commit `111adc92`): `HistorizedTagRef(MuxRef, HistorianName)`
|
||||
> carried through the sink/recorder; interest registered by mux ref, values written under the historian
|
||||
> name. Recorder + applier tests green.
|
||||
> - **FU-4 — ✅ DONE** in OtOpcUa (this branch, commit `b2276b5b`).
|
||||
> - **FU-5** — still pre-existing/not-ours (tracked below). **FU-6** — still pending the merges.
|
||||
|
||||
**Live-validation harness recap (how to reproduce any of the live findings below):** run the
|
||||
HistorianGateway locally against the live historian, then point the OtOpcUa live tests (or `grpcurl`)
|
||||
at it. The gateway boots from env-var config (secrets from `~/.zshenv`):
|
||||
|
||||
```
|
||||
ASPNETCORE_ENVIRONMENT=Development
|
||||
Historian__Host=$HISTORIAN_GRPC_HOST Historian__Port=32565 Historian__GrpcUseTls=true
|
||||
Historian__UserName=$HISTORIAN_USER Historian__Password=$HISTORIAN_PASSWORD
|
||||
Historian__AllowUntrustedServerCertificate=true
|
||||
Galaxy__ConnectionString=$GALAXY_SQL_CONNECTION
|
||||
RuntimeDb__Enabled=true RuntimeDb__EventReadsEnabled=true
|
||||
RuntimeDb__ConnectionString="Server=$HISTORIAN_GRPC_HOST;Database=Runtime;User Id=$HISTORIAN_SQL_USER;Password=$HISTORIAN_SQL_PASSWORD;TrustServerCertificate=true;Encrypt=false"
|
||||
ApiKeys__Mode=Disabled
|
||||
# dotnet run the Server → gRPC h2c on localhost:5221, HTTP on :5220 (/healthz, /health/ready)
|
||||
```
|
||||
OtOpcUa live tests then read `HISTGW_GATEWAY_ENDPOINT=http://localhost:5221` +
|
||||
`HISTGW_GATEWAY_APIKEY=<any>` + `HISTGW_TEST_TAG`/`HISTGW_WRITE_SANDBOX_TAG`/`HISTGW_ALARM_SOURCE`.
|
||||
Direct SQL: `Runtime.dbo.Events` is an **INSQL linked-server view that rejects untimed queries** —
|
||||
always include an `EventTimeUtc` range. `sqlcmd -S $HISTORIAN_GRPC_HOST -d Runtime -U $HISTORIAN_SQL_USER -C`
|
||||
(password via `SQLCMDPASSWORD`).
|
||||
|
||||
---
|
||||
|
||||
## Priority 1 — Gateway-side bugs that block OtOpcUa write/read use cases
|
||||
**Owning repo: `~/Desktop/HistorianGateway` (HistorianGateway).** OtOpcUa code is correct for both;
|
||||
these are gateway defects that gate the "write OtOpcUa's own data, read it back" use case.
|
||||
|
||||
### FU-1 — `SendEvent` does not populate `Source_Object` — ✅ RESOLVED as a documented protocol limitation (2026-06-27)
|
||||
> **Outcome:** root-caused and confirmed **not fixable at the gateway** — the captured CM_EVENT event-send
|
||||
> wire (`HistorianEventWriteProtocol.SerializeEventValueBlob`) serializes Namespace/Type/properties but
|
||||
> **never `SourceName`** (the gateway threads it correctly; the wire drops it). `Source_Object` is a
|
||||
> Galaxy-platform association for object-raised events. Documented as `pending.md` **C4** + a CLAUDE.md note
|
||||
> in HistorianGateway; likely won't-fix (would need new wire-capture evidence in `histsdk` — vendored
|
||||
> sources aren't hand-edited). The "Investigation/Proposed fix" below is retained for the record; option 1
|
||||
> is now known to be infeasible.
|
||||
|
||||
**Symptom (live-proven):** OtOpcUa's `GatewayAlarmHistorianWriter.SendEvent` of an event with
|
||||
`source_name="HistGW.LiveTest.AlarmSource"` **acks** and **lands in `Runtime.dbo.Events`** with the
|
||||
correct `Type` (`LimitAlarm`) and `EventTimeUtc` (no shift) — but with **`Source_Object = NULL`** (and
|
||||
all other `Source_*`/`Provider_*` columns null). The gateway's `SqlEventReader` filters
|
||||
`WHERE Source_Object = @source`, so a source-filtered `ReadEvents` of a just-sent event returns 0.
|
||||
|
||||
**What works (so this is narrow, not "C2 won't-fix"):**
|
||||
- Time-only `ReadEvents` (no source filter) returns events (50 in a 2-day window during validation).
|
||||
- Source-filtered `ReadEvents` for a **real Galaxy event source** (`TableAlarms_006`) returns its
|
||||
history (`System.Deploy`/`Undeploy`/`Alarm.Set`, each with `source_name` populated). So the SQL
|
||||
reader + source filter are functional; only **ad-hoc SendEvents lack a `Source_Object`.**
|
||||
- ⇒ **Reading existing Galaxy alarm/event history by source already works** (the mxaccessgw read use
|
||||
case). Only round-tripping OtOpcUa's *own* sends by source is blocked.
|
||||
|
||||
**Investigation (gateway repo):**
|
||||
- Read the v8 event-send path: `RegisterCmEventTag` + the `ConnectionType=Event` send (CM_EVENT). Find
|
||||
where the event's source/tag is set on the wire payload and whether the historian maps any send-side
|
||||
field → the `Events.Source_Object` column. Start at the gateway `SendEvent` service + the vendored
|
||||
`AVEVA.Historian.Client` event session (`HistorianEventSession`), and the
|
||||
`event-session-reuse-spike` notes in `../histsdk/docs/reverse-engineering/`.
|
||||
- Determine whether the historian's CM_EVENT API even *allows* setting a `Source_Object` for an event
|
||||
not raised by a Galaxy object. If the source must be a registered event-tag/source name, decide how
|
||||
OtOpcUa's `EquipmentPath` should map to it.
|
||||
|
||||
**Proposed fix (one of):**
|
||||
1. If the send payload has a source/tag field that maps to `Source_Object`: populate it from the event's
|
||||
`source_name` in the gateway `SendEvent` handler. (Preferred — makes write-back-by-source work.)
|
||||
2. If the historian cannot carry a source for ad-hoc events: document it, and have the gateway's
|
||||
`SqlEventReader` optionally match the source in a fallback column the send *does* populate (if any),
|
||||
or expose a "read all events in window, filter client-side" mode. Update OtOpcUa's
|
||||
`GatewayHistorianDataSource.ReadEventsAsync` defensive client-side source filter accordingly (it
|
||||
currently drops events whose mapped `SourceName` ≠ requested source — which would also drop
|
||||
source-less sends even if the server returned them).
|
||||
|
||||
**Acceptance:** an OtOpcUa `SendEvent(source=X)` is readable back via `ReadEvents(source=X)` within the
|
||||
window. Then **un-skip** `Alarm_SendEvent_then_ReadEvents` in
|
||||
`tests/Drivers/.../Live/GatewayLiveIntegrationTests.cs` (it currently `Assert.Skip`s on a 0-result with
|
||||
the accurate reason).
|
||||
|
||||
### FU-2 — `WriteLiveValues` shifts an explicit timestamp by the local↔UTC offset (~+4h) — ✅ DONE + live-validated (2026-06-27)
|
||||
> **Outcome:** fixed in HistorianGateway (`fix/gateway-otopcua-followups`). The SQL live-write path now
|
||||
> converts UTC→server-local in-SQL via `DATEADD(MINUTE, DATEPART(TZOFFSET, SYSDATETIMEOFFSET()), @dt)` (a
|
||||
> single atomic offset read). An explicit-timestamp round-trip (real SQL write → gateway UTC ReadRaw) is now
|
||||
> EXACT against the live 2023 R2 historian (delta 00:00:00); offline unit test locks the exact conversion
|
||||
> expression. The OtOpcUa live write test can now be tightened (see acceptance).
|
||||
|
||||
**Symptom (live-proven, reproduces via raw `grpcurl` — no OtOpcUa code involved):** a `WriteLiveValues`
|
||||
with an **explicit** `timestamp=2026-06-27T03:45:00Z` lands in the historian at
|
||||
`2026-06-27T07:45:00Z` (+4h = the deployment's local↔UTC delta). A **server-stamped** write (null
|
||||
timestamp) lands correctly at the gateway's UTC now. The OtOpcUa value-writer sends correct UTC
|
||||
(`Timestamp.FromDateTime(SpecifyKind(ts, Utc))`), so the shift is in the gateway's SQL write path.
|
||||
|
||||
**Impact:** the continuous-historization recorder writes the driver's **source** timestamp (explicit),
|
||||
so historized values would carry timestamps offset by the host's UTC offset until fixed. (The OtOpcUa
|
||||
live write test currently uses a ±12h tz-tolerant readback window to validate *persistence* around
|
||||
this — see FU-2 acceptance.)
|
||||
|
||||
**Investigation (gateway repo):** `SqlLiveValueWriter` (the `aaAnalogTagInsert` + `INSERT INTO History`
|
||||
path). Inspect which `History` DateTime column is written (local vs `*UTC`) and the conversion applied
|
||||
to the incoming proto UTC `Timestamp`. The +4h (value lands *later* than supplied UTC) is consistent
|
||||
with writing a UTC value into a **local** column that `ReadRaw` then converts local→UTC, on a server
|
||||
whose offset is −4h (EDT). Compare against the **server-stamped** path (which is correct) to see what
|
||||
conversion the explicit path is missing.
|
||||
|
||||
**Proposed fix:** convert the supplied UTC timestamp to the historian server's local time before the
|
||||
`History` insert (or write the UTC-typed column), so an explicit UTC timestamp round-trips unchanged.
|
||||
Add a gateway unit/live test: write explicit `T`, read back, assert the sample timestamp == `T`.
|
||||
|
||||
**Acceptance:** an explicit-timestamp `WriteLiveValues` reads back at the supplied UTC time. Then
|
||||
**tighten** the OtOpcUa live write test (`Write_then_read_on_sandbox_tag`) back to a narrow recent
|
||||
window anchored on the write time.
|
||||
|
||||
---
|
||||
|
||||
## Priority 2 — OtOpcUa-side follow-ups
|
||||
**Owning repo: `~/Desktop/OtOpcUa` (this repo).**
|
||||
|
||||
### FU-3 — Continuous-historization `HistorianTagname` override edge case — ✅ DONE (2026-06-27, commit `111adc92`)
|
||||
> **Outcome:** implemented the "carry both identifiers" fix below. A new `HistorizedTagRef(MuxRef,
|
||||
> HistorianName)` record threads through `IHistorizedTagSubscriptionSink` → the recorder; the recorder keeps
|
||||
> a **muxRef → SET-of-historian-names** map, registers/filters mux interest by `MuxRef` (= driver `FullName`)
|
||||
> but writes under every `HistorianName` (override-or-FullName) sharing that ref. The applier resolves both.
|
||||
> The set (not a single name) closes a code-review **Critical**: one driver ref can back several historized
|
||||
> equipment tags via aliasing (identical machines sharing a register), each with its own override — a single
|
||||
> fan must write ALL of them, not silently drop all but one. Tests: divergent-override, aliased-refs-each-
|
||||
> get-the-value, remove-one-alias-keeps-the-ref, override-rename updates the write target without mux churn;
|
||||
> applier feed tests assert the full pairs. Commits `111adc92` + `60695179` (review fix).
|
||||
|
||||
The `ContinuousHistorizationRecorder` registers `DependencyMuxActor` interest **by the resolved
|
||||
historian name** (`HistorianTagname` override else `FullName`) — the same key the EnsureTags hook and
|
||||
the writer use. The mux fans `DependencyValueChanged` **keyed by `FullReference`** (the driver's
|
||||
published ref). In the **common case (no override)** historian-name == `FullReference`, so it's fully
|
||||
consistent and works (live-validated path is the value writer; mux fan-out is the recorder's input).
|
||||
**When a `HistorianTagname` override is set** (override ≠ `FullReference`), the recorder registers
|
||||
interest under a key the mux never fans → that tag's values are never captured.
|
||||
**Fix options:** register mux interest by `FullReference` (the mux key) while writing to the historian
|
||||
under the resolved historian name — i.e. carry both identifiers through `IHistorizedTagSubscriptionSink`
|
||||
/ the recorder (a `(muxRef, historianName)` pair) instead of a single string. Add a recorder test with
|
||||
a divergent override. **Low urgency** (overrides are uncommon); only matters for non-Galaxy historized
|
||||
tags that set an explicit `HistorianTagname`.
|
||||
|
||||
### FU-4 — `AlarmHistorianOptions.Validate()` `MaxAttempts<=0` test coverage (minor) — ✅ DONE (2026-06-27, commit `b2276b5b`)
|
||||
T19 pruned the Wonderware-shaped fields and reworked `AlarmHistorianRegistrationTests`. The
|
||||
`MaxAttempts <= 0` warning branch in `AlarmHistorianOptions.Validate()` is exercised in prod but not
|
||||
covered by a test (the sibling warnings for `DrainIntervalSeconds`/`Capacity`/`DeadLetterRetentionDays`
|
||||
are). Add a `Validate_warns_on_non_positive_max_attempts` case. Trivial.
|
||||
|
||||
### FU-5 — Pre-existing `Host.IntegrationTests` failure (NOT ours) — ✅ TRACKED via Gitea issue [#424](https://gitea.dohertylan.com/dohertj2/lmxopcua/issues/424) (2026-06-27)
|
||||
> **Outcome:** re-confirmed pre-existing — the test still fails (`Accepted` expected, `Rejected` actual) and
|
||||
> `git diff --stat master..HEAD` shows this branch touches **none** of the Modbus/`DraftValidator`/
|
||||
> `ConfigComposer`/equipment-materialization path. Filed as a standalone tracking issue (#424) so the
|
||||
> `DraftValidator` rejection root-cause is owned separately from the historian work. No code change here.
|
||||
|
||||
`EquipmentNamespaceMaterializationTests.Deploying_an_equipment_namespace_carries_the_signal_into_the_artifact`
|
||||
fails (`Rejected` vs expected `Accepted`) on a **Modbus-only** namespace via `DraftValidator`/
|
||||
`ConfigComposer` — untouched by this branch. **Verified failing identically on `master`** (via
|
||||
`git stash`). Environment/pre-existing; out of scope for the historian work but worth a separate ticket.
|
||||
|
||||
---
|
||||
|
||||
## Priority 3 — Cross-repo propagation (after merges)
|
||||
- **FU-6 — scadaproj index + agent memory.** When PR #423 merges (and the Plan 1 client PR), update
|
||||
`../scadaproj/CLAUDE.md` (the HistorianGateway + OtOpcUa entries) and the agent memory notes
|
||||
(`otopcua-historian-backend`, `scadaproj-umbrella`) to record: OtOpcUa now consumes
|
||||
`ZB.MOM.WW.HistorianGateway.Client` as its historian backend; the Wonderware historian driver was
|
||||
retired; the two gateway follow-ups (FU-1/FU-2). Per the CLAUDE.md cross-repo propagation rule.
|
||||
|
||||
---
|
||||
|
||||
## Already resolved this effort (for the record — do NOT redo)
|
||||
- **Alarm SendEvent event-id bug** — `AlarmEventMapper` set the wire `Id` → gateway handler throws →
|
||||
every alarm send `PermanentFail`. **Fixed** (`44644ddc`): leave `Id` unset, carry the id as an
|
||||
`AlarmId` property. Live-validated (send acks).
|
||||
- **Continuous-historization ref-feed gap** — recorder spawned with an empty ref set. **Closed**
|
||||
(`2982cc4b`): `IHistorizedTagSubscriptionSink` + recorder `UpdateHistorizedRefs(added, removed)`
|
||||
converges mux interest on each `AddressSpaceApplier.Apply()`.
|
||||
- **Read path / use case 1** — live-validated PASS (ReadRaw through `GatewayHistorianDataSource`).
|
||||
- **C2 mis-attribution** — the alarm readback-0 was NOT the "C2 server-gated event reads" limitation;
|
||||
the SQL reader works (see FU-1).
|
||||
@@ -7,6 +7,12 @@
|
||||
# Any divergence from these defaults must be deliberate and recorded in docs/v2/Architecture.md.
|
||||
|
||||
akka {
|
||||
# Akka logger wiring (route ILoggingAdapter → Serilog) is configured via Akka.Hosting's
|
||||
# ConfigureLoggers in ServiceCollectionExtensions.WithOtOpcUaClusterBootstrap — HOCON
|
||||
# `akka.loggers` alone is not honored by Akka.Hosting. logger-startup-timeout is kept here
|
||||
# since the Serilog logger can be slow to initialize at startup.
|
||||
logger-startup-timeout = 30s
|
||||
|
||||
extensions = [
|
||||
"Akka.Cluster.Tools.PublishSubscribe.DistributedPubSubExtensionProvider, Akka.Cluster.Tools"
|
||||
]
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
using Akka.Cluster.Hosting;
|
||||
using Akka.Event;
|
||||
using Akka.Hosting;
|
||||
using Akka.Logger.Serilog;
|
||||
using Akka.Remote.Hosting;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
@@ -53,6 +55,19 @@ public static class ServiceCollectionExtensions
|
||||
|
||||
builder.AddHocon(HoconLoader.LoadBaseConfig(), HoconAddMode.Append);
|
||||
|
||||
// Route Akka's internal ILoggingAdapter (DriverHostActor, DriverInstanceActor, cluster
|
||||
// events, …) into Serilog so those logs reach the same sinks as the MEL/Serilog application
|
||||
// logs. Akka.Hosting owns logger setup, so HOCON `akka.loggers` alone is not honored — the
|
||||
// logger must be registered through ConfigureLoggers. Without this the actor graph logs only
|
||||
// to the default StandardOutLogger (discarded under the Windows service host), which is why
|
||||
// the driver-role actors were invisible during the 2026-06 data-plane investigation.
|
||||
builder.ConfigureLoggers(setup =>
|
||||
{
|
||||
setup.LogLevel = LogLevel.DebugLevel;
|
||||
setup.ClearLoggers();
|
||||
setup.AddLogger<SerilogLogger>();
|
||||
});
|
||||
|
||||
builder.WithRemoting(new RemoteOptions
|
||||
{
|
||||
HostName = options.Hostname,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
<PackageReference Include="Akka.Cluster"/>
|
||||
<PackageReference Include="Akka.Cluster.Hosting"/>
|
||||
<PackageReference Include="Akka.Cluster.Tools"/>
|
||||
<PackageReference Include="Akka.Logger.Serilog"/>
|
||||
<PackageReference Include="Akka.Remote.Hosting"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions"/>
|
||||
|
||||
@@ -70,6 +70,10 @@ public sealed class DeferredAddressSpaceSink : IOpcUaAddressSpaceSink, ISurgical
|
||||
/// <summary>Rebuilds the address space through the inner sink.</summary>
|
||||
public void RebuildAddressSpace() => _inner.RebuildAddressSpace();
|
||||
|
||||
/// <summary>Announces a runtime NodeAdded model-change (discovered-node injection) through the inner sink.</summary>
|
||||
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
|
||||
public void RaiseNodesAddedModelChange(string affectedNodeId) => _inner.RaiseNodesAddedModelChange(affectedNodeId);
|
||||
|
||||
/// <summary>Forwards an in-place tag-attribute update (F10b) to the inner sink when it supports the
|
||||
/// surgical capability. Returns false otherwise — before the real <c>SdkAddressSpaceSink</c> is
|
||||
/// swapped in (inner is still the null sink), or any inner sink that isn't surgical — so the caller
|
||||
|
||||
@@ -84,6 +84,14 @@ public interface IOpcUaAddressSpaceSink
|
||||
/// successful deployment apply so the node manager reflects the new config. Idempotent.
|
||||
/// </summary>
|
||||
void RebuildAddressSpace();
|
||||
|
||||
/// <summary>
|
||||
/// Announce that nodes were added at runtime (discovered-node injection) under
|
||||
/// <paramref name="affectedNodeId"/> so subscribed clients refresh their browse
|
||||
/// (Part 3 GeneralModelChangeEvent, verb NodeAdded).
|
||||
/// </summary>
|
||||
/// <param name="affectedNodeId">The node under which discovered nodes were added.</param>
|
||||
void RaiseNodesAddedModelChange(string affectedNodeId);
|
||||
}
|
||||
|
||||
/// <summary>OPC UA status code projection — Good / Uncertain / Bad. Real SDK has finer-grained
|
||||
@@ -114,4 +122,7 @@ public sealed class NullOpcUaAddressSpaceSink : IOpcUaAddressSpaceSink
|
||||
|
||||
/// <inheritdoc />
|
||||
public void RebuildAddressSpace() { }
|
||||
|
||||
/// <inheritdoc />
|
||||
public void RaiseNodesAddedModelChange(string affectedNodeId) { }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
|
||||
|
||||
/// <summary>
|
||||
/// Per-append durability cadence for the historization outbox. Local to the OtOpcUa abstraction
|
||||
/// layer (deliberately decoupled from the gateway's internal store-forward commit-mode type).
|
||||
/// </summary>
|
||||
public enum HistorizationCommitMode
|
||||
{
|
||||
/// <summary>fsync the log before each <c>AppendAsync</c> returns — safest, no loss window.</summary>
|
||||
PerEntry,
|
||||
|
||||
/// <summary>Batch commits onto a background timer — higher throughput, a bounded worst-case loss window.</summary>
|
||||
Periodic,
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
|
||||
|
||||
/// <summary>
|
||||
/// One durable record buffered by the continuous-historization outbox before it is written to
|
||||
/// the historian. Carries the minimal payload the SQL analog live-value write path can ingest:
|
||||
/// a numeric value, a quality code, and a UTC timestamp keyed by tag.
|
||||
/// </summary>
|
||||
/// <param name="Id">Stable identifier used to ack (remove) the entry once written. Unique per append.</param>
|
||||
/// <param name="Tag">Fully-qualified historian tag name the value is recorded against.</param>
|
||||
/// <param name="NumericValue">The coerced numeric sample value (the SQL write path is numeric-only).</param>
|
||||
/// <param name="Quality">OPC-UA-derived quality code (e.g. 192 = Good) carried through to the historian.</param>
|
||||
/// <param name="TimestampUtc">UTC source timestamp of the sample.</param>
|
||||
public sealed record HistorizationOutboxEntry(
|
||||
Guid Id,
|
||||
string Tag,
|
||||
double NumericValue,
|
||||
ushort Quality,
|
||||
DateTime TimestampUtc);
|
||||
@@ -0,0 +1,73 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
/// <summary>
|
||||
/// Server-side historian tag provisioning — ensures the historian knows about the tags the
|
||||
/// address space historizes before values are written. Registered alongside
|
||||
/// <see cref="IHistorianDataSource"/> and invoked by the address-space applier when historized
|
||||
/// nodes are (re)applied.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Provisioning is best-effort and <b>non-blocking</b>: an unreachable or erroring historian
|
||||
/// never fails an address-space apply. Implementations return a
|
||||
/// <see cref="HistorianProvisionResult"/> tally instead of throwing, so the applier can surface
|
||||
/// a count without taking the server down. Non-historizable types are skipped (counted in
|
||||
/// <see cref="HistorianProvisionResult.Skipped"/>), not failed.
|
||||
/// </remarks>
|
||||
public interface IHistorianProvisioning
|
||||
{
|
||||
/// <summary>
|
||||
/// Ensures the supplied historian tags exist (create-or-update). Never throws; a transport or
|
||||
/// backend failure is reported via <see cref="HistorianProvisionResult.Failed"/>.
|
||||
/// </summary>
|
||||
/// <param name="requests">The tags to ensure, with their driver data type and optional metadata.</param>
|
||||
/// <param name="ct">A cancellation token for the operation.</param>
|
||||
/// <returns>A tally of how the requests were handled.</returns>
|
||||
Task<HistorianProvisionResult> EnsureTagsAsync(
|
||||
IReadOnlyList<HistorianTagProvisionRequest> requests, CancellationToken ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A single historian tag to ensure — the driver-agnostic shape the applier hands to
|
||||
/// <see cref="IHistorianProvisioning.EnsureTagsAsync"/>. A backend maps
|
||||
/// <see cref="DataType"/> onto its native tag type and skips types it cannot historize.
|
||||
/// </summary>
|
||||
/// <param name="TagName">The full reference / tag name to ensure in the historian.</param>
|
||||
/// <param name="DataType">The driver-agnostic data type, used to select the historian tag type.</param>
|
||||
/// <param name="EngineeringUnit">Optional engineering unit (e.g. <c>degC</c>); <c>null</c> when unknown.</param>
|
||||
/// <param name="Description">Optional human-readable description; <c>null</c> when unknown.</param>
|
||||
public sealed record HistorianTagProvisionRequest(
|
||||
string TagName,
|
||||
DriverDataType DataType,
|
||||
string? EngineeringUnit,
|
||||
string? Description);
|
||||
|
||||
/// <summary>
|
||||
/// The tally returned by <see cref="IHistorianProvisioning.EnsureTagsAsync"/>. The buckets
|
||||
/// partition the input: <c>Requested == Ensured + Skipped + Failed</c>.
|
||||
/// </summary>
|
||||
/// <param name="Requested">Total tags submitted.</param>
|
||||
/// <param name="Ensured">Tags the historian acknowledged as created or already present.</param>
|
||||
/// <param name="Skipped">Tags whose data type is not historizable on the backend (never sent).</param>
|
||||
/// <param name="Failed">Tags that were sent but the backend did not acknowledge (incl. a swallowed transport error).</param>
|
||||
public sealed record HistorianProvisionResult(
|
||||
int Requested,
|
||||
int Ensured,
|
||||
int Skipped,
|
||||
int Failed);
|
||||
|
||||
/// <summary>
|
||||
/// No-op <see cref="IHistorianProvisioning"/> — the applier's safe default when no historian
|
||||
/// backend is registered. Every call returns an all-zero tally and never touches a backend.
|
||||
/// </summary>
|
||||
public sealed class NullHistorianProvisioning : IHistorianProvisioning
|
||||
{
|
||||
/// <summary>The shared singleton instance.</summary>
|
||||
public static readonly NullHistorianProvisioning Instance = new();
|
||||
|
||||
private NullHistorianProvisioning() { }
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<HistorianProvisionResult> EnsureTagsAsync(
|
||||
IReadOnlyList<HistorianTagProvisionRequest> requests, CancellationToken ct) =>
|
||||
Task.FromResult(new HistorianProvisionResult(0, 0, 0, 0));
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
|
||||
|
||||
/// <summary>
|
||||
/// One numeric sample the continuous-historization recorder drains to the historian's SQL
|
||||
/// live-value write path. Carries the minimal payload that path can ingest: an optional UTC
|
||||
/// timestamp, the coerced numeric value, and an OPC-DA quality byte.
|
||||
/// </summary>
|
||||
/// <param name="TimestampUtc">
|
||||
/// UTC source timestamp of the sample, or <c>null</c> to defer to the historian's server-stamped
|
||||
/// current-time path (the SQL writer uses <c>SYSDATETIME()</c> when the timestamp is absent).
|
||||
/// </param>
|
||||
/// <param name="Value">The coerced numeric value (the SQL analog write path is numeric-only).</param>
|
||||
/// <param name="Quality">OPC-DA-derived quality code carried to the historian (192 = Good).</param>
|
||||
public readonly record struct HistorizationValue(DateTime? TimestampUtc, double Value, ushort Quality);
|
||||
|
||||
/// <summary>
|
||||
/// Seam over the historian's live-value write path used by the continuous-historization recorder.
|
||||
/// Lives in the abstraction layer so the Runtime recorder depends on it without taking a hard
|
||||
/// reference on the gRPC gateway driver; the gateway driver supplies the concrete adapter
|
||||
/// (<c>GatewayHistorianValueWriter</c>).
|
||||
/// </summary>
|
||||
public interface IHistorianValueWriter
|
||||
{
|
||||
/// <summary>
|
||||
/// Writes a batch of live values for a single tag through the historian's SQL live-write path.
|
||||
/// Implementations are expected to be non-throwing: a transport/gateway error is surfaced as a
|
||||
/// <c>false</c> result so the recorder retains the entries and retries, rather than as an
|
||||
/// exception.
|
||||
/// </summary>
|
||||
/// <param name="tag">Fully-qualified historian tag the values are recorded against.</param>
|
||||
/// <param name="values">The numeric samples to write, in append order.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns><c>true</c> on a successful (or durably-queued) gateway ack; <c>false</c> on a retryable failure.</returns>
|
||||
Task<bool> WriteLiveValuesAsync(string tag, IReadOnlyList<HistorizationValue> values, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
|
||||
|
||||
/// <summary>
|
||||
/// Durable, crash-safe FIFO buffer the continuous-historization recorder appends sampled values
|
||||
/// to <em>before</em> acking the writer, so nothing is lost if the process dies mid-drain. An
|
||||
/// implementation guarantees: appended entries survive an unclean restart up to its commit
|
||||
/// cadence; <see cref="PeekBatchAsync"/> returns entries in append (FIFO) order; and
|
||||
/// <see cref="RemoveAsync"/> durably reclaims an acked entry. A capacity-bounded implementation
|
||||
/// drops the oldest entry on overflow and reflects it in <see cref="DroppedCount"/>.
|
||||
/// </summary>
|
||||
public interface IHistorizationOutbox : IDisposable
|
||||
{
|
||||
/// <summary>Lifetime count of entries dropped because an append would have exceeded capacity.</summary>
|
||||
long DroppedCount { get; }
|
||||
|
||||
/// <summary>Appends <paramref name="entry"/> to the tail of the durable buffer.</summary>
|
||||
/// <param name="entry">The value record to buffer.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
ValueTask AppendAsync(HistorizationOutboxEntry entry, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Returns up to <paramref name="max"/> oldest un-acked entries in FIFO order without removing
|
||||
/// them. Removal happens via <see cref="RemoveAsync"/> once each entry is durably written.
|
||||
/// </summary>
|
||||
/// <param name="max">Maximum number of entries to return; must be positive.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
ValueTask<IReadOnlyList<HistorizationOutboxEntry>> PeekBatchAsync(int max, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Durably removes the entry identified by <paramref name="id"/> (and any older entries ahead
|
||||
/// of it in FIFO order), advancing the buffer head. A no-op when the id is unknown.
|
||||
/// </summary>
|
||||
/// <param name="id">The <see cref="HistorizationOutboxEntry.Id"/> to ack.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
ValueTask RemoveAsync(Guid id, CancellationToken ct);
|
||||
|
||||
/// <summary>Current number of un-acked entries held in the buffer.</summary>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
ValueTask<int> CountAsync(CancellationToken ct);
|
||||
}
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
/// <summary>
|
||||
/// A single historized tag the recorder tracks, carrying BOTH identifiers it needs — kept distinct
|
||||
/// because a <c>HistorianTagname</c> override makes them diverge:
|
||||
/// <list type="bullet">
|
||||
/// <item>
|
||||
/// <see cref="MuxRef"/> — the driver-published reference the per-node dependency mux fans its
|
||||
/// <c>DependencyValueChanged</c> by (the tag's driver-side <c>FullName</c>). The recorder
|
||||
/// registers mux interest and matches incoming values by THIS.
|
||||
/// </item>
|
||||
/// <item>
|
||||
/// <see cref="HistorianName"/> — the resolved historian tag name the value is written under (a
|
||||
/// non-alarm historized value variable's <c>HistorianTagname</c> override, else its
|
||||
/// <c>FullName</c>) — the SAME name the EnsureTags provisioning hook ensures.
|
||||
/// </item>
|
||||
/// </list>
|
||||
/// In the common (no-override) case the two are the same string; an override is the only case they
|
||||
/// diverge, and conflating them would silently drop that tag's values (interest registered under a
|
||||
/// key the mux never fans).
|
||||
/// </summary>
|
||||
/// <param name="MuxRef">The driver ref the mux fans by (and the key the recorder registers interest under).</param>
|
||||
/// <param name="HistorianName">The resolved historian tag name the value is historized under.</param>
|
||||
public sealed record HistorizedTagRef(string MuxRef, string HistorianName)
|
||||
{
|
||||
/// <summary>The no-override identity: the mux ref and historian name are the same string (the tag has
|
||||
/// no <c>HistorianTagname</c> override, so it historizes under its own driver <c>FullName</c>).</summary>
|
||||
/// <param name="reference">The driver ref that serves as both the mux key and the historian name.</param>
|
||||
/// <returns>A ref whose <see cref="MuxRef"/> and <see cref="HistorianName"/> are equal.</returns>
|
||||
public static HistorizedTagRef ForSelf(string reference) => new(reference, reference);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Server-side feed that keeps the continuous-historization recorder's set of historized tag refs
|
||||
/// in step with the deployed address space. The <c>AddressSpaceApplier</c> (in the
|
||||
/// OpcUaServer layer) calls this on every deploy with the add/remove DELTA of historized refs the
|
||||
/// plan changes — the applier only ever sees a diff (an incremental/surgical apply carries a delta,
|
||||
/// not the full set), so the recorder behind this seam keeps the full set and converges it.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The feed is <b>non-blocking</b> and best-effort: the production adapter is a single
|
||||
/// fire-and-forget actor <c>Tell</c>, so it never blocks the OPC UA publish thread the applier runs
|
||||
/// on, and the applier wraps the call so a faulting feed can never break a deploy. The applier
|
||||
/// references this abstraction (not the Runtime recorder) so the OpcUaServer layer keeps no
|
||||
/// dependency on Akka / the actor system — exactly mirroring how <see cref="IHistorianProvisioning"/>
|
||||
/// decouples the EnsureTags provisioning hook.
|
||||
/// </remarks>
|
||||
public interface IHistorizedTagSubscriptionSink
|
||||
{
|
||||
/// <summary>
|
||||
/// Converge the recorder's historized-ref interest by an add/remove delta. Each ref carries both
|
||||
/// its <see cref="HistorizedTagRef.MuxRef"/> (the driver ref the mux fans by) and its
|
||||
/// <see cref="HistorizedTagRef.HistorianName"/> (the resolved override-or-FullName the value is
|
||||
/// historized under) — the same name the EnsureTags provisioning hook ensures. The recorder
|
||||
/// applies the delta to its tracked full set and re-registers mux interest (keyed by
|
||||
/// <see cref="HistorizedTagRef.MuxRef"/>) only when the registered key-set actually changes.
|
||||
/// </summary>
|
||||
/// <param name="added">Historized refs newly historized by this deploy (added/changed-into tags).</param>
|
||||
/// <param name="removed">Historized refs no longer historized by this deploy (removed/changed-out tags).</param>
|
||||
void UpdateHistorizedRefs(IReadOnlyList<HistorizedTagRef> added, IReadOnlyList<HistorizedTagRef> removed);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// No-op <see cref="IHistorizedTagSubscriptionSink"/> — the applier's safe default when continuous
|
||||
/// historization is disabled or unwired (no recorder to feed). Every call is a no-op and never
|
||||
/// touches an actor system.
|
||||
/// </summary>
|
||||
public sealed class NullHistorizedTagSubscriptionSink : IHistorizedTagSubscriptionSink
|
||||
{
|
||||
/// <summary>The shared singleton instance.</summary>
|
||||
public static readonly NullHistorizedTagSubscriptionSink Instance = new();
|
||||
|
||||
private NullHistorizedTagSubscriptionSink() { }
|
||||
|
||||
/// <inheritdoc />
|
||||
public void UpdateHistorizedRefs(IReadOnlyList<HistorizedTagRef> added, IReadOnlyList<HistorizedTagRef> removed)
|
||||
{
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,17 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
/// <summary>How aggressively the host re-runs post-connect discovery for this driver.</summary>
|
||||
public enum DiscoveryRediscoverPolicy
|
||||
{
|
||||
/// <summary>Retry every interval up to the cap or until the captured set is non-empty and stable
|
||||
/// (for drivers whose discovered shape fills in asynchronously after connect, e.g. the FOCAS FixedTree).</summary>
|
||||
UntilStable,
|
||||
/// <summary>Run exactly one discovery pass on connect (drivers that discover synchronously in DiscoverAsync).</summary>
|
||||
Once,
|
||||
/// <summary>Never run post-connect discovery.</summary>
|
||||
Never,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Driver capability for discovering tags and hierarchy from the backend.
|
||||
/// Streams discovered nodes into <see cref="IAddressSpaceBuilder"/> rather than
|
||||
@@ -14,4 +26,7 @@ public interface ITagDiscovery
|
||||
/// <param name="builder">The address space builder to stream discovered nodes into.</param>
|
||||
/// <param name="cancellationToken">A cancellation token for the discovery operation.</param>
|
||||
Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Post-connect re-discovery policy. Default preserves the original retry-until-stable behavior.</summary>
|
||||
DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
|
||||
/// <summary>
|
||||
/// The historian sink contract — where qualifying alarm events land. Phase 7 plan
|
||||
/// decision #17: ingestion routes through the Wonderware historian sidecar
|
||||
/// (<c>WonderwareHistorianClient</c>), which owns the <c>aahClientManaged</c> DLLs
|
||||
/// and 32-bit constraints. Tests use an in-memory fake; production uses
|
||||
/// The historian sink contract — where qualifying alarm events land. Ingestion routes
|
||||
/// through the HistorianGateway alarm writer (the gateway's <c>SendEvent</c> gRPC path)
|
||||
/// behind the durable store-and-forward queue. Tests use an in-memory fake; production uses
|
||||
/// <see cref="SqliteStoreAndForwardSink"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
@@ -80,7 +79,7 @@ public enum HistorianDrainState
|
||||
BackingOff,
|
||||
}
|
||||
|
||||
/// <summary>Returned by the Wonderware historian sidecar per event — drain worker uses this to decide retry cadence.</summary>
|
||||
/// <summary>Returned by the historian alarm writer per event — drain worker uses this to decide retry cadence.</summary>
|
||||
public enum HistorianWriteOutcome
|
||||
{
|
||||
/// <summary>Successfully persisted to the historian. Remove from queue.</summary>
|
||||
@@ -91,7 +90,7 @@ public enum HistorianWriteOutcome
|
||||
PermanentFail,
|
||||
}
|
||||
|
||||
/// <summary>What the drain worker delegates writes to — production is <c>WonderwareHistorianClient</c> (the Wonderware historian sidecar).</summary>
|
||||
/// <summary>What the drain worker delegates writes to — production is the HistorianGateway alarm writer (the gateway's <c>SendEvent</c> gRPC path).</summary>
|
||||
public interface IAlarmHistorianWriter
|
||||
{
|
||||
/// <summary>Push a batch of events to the historian. Returns one outcome per event, same order.</summary>
|
||||
|
||||
@@ -732,12 +732,13 @@ public sealed class SqliteStoreAndForwardSink : IAlarmHistorianSink, IDisposable
|
||||
/// <summary>Gets the current exponential backoff delay for retry operations.</summary>
|
||||
public TimeSpan CurrentBackoff => BackoffLadder[_backoffIndex];
|
||||
|
||||
/// <summary>Disposes the sink and releases all held resources including the drain timer.</summary>
|
||||
/// <summary>Disposes the sink and releases all held resources including the drain timer and the writer.</summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_drainTimer?.Dispose();
|
||||
_drainGate.Dispose();
|
||||
if (_writer is IDisposable writerDisposable) writerDisposable.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -998,6 +998,14 @@ public sealed class AbCipDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
|
||||
// ---- ITagDiscovery ----
|
||||
|
||||
/// <summary>
|
||||
/// Run-once: <see cref="DiscoverAsync"/> emits pre-declared tags and (when
|
||||
/// EnableControllerBrowse is set) fully awaits the @tags symbol-table walk + UDT-shape
|
||||
/// resolution within the single call, streaming the complete node set in one pass —
|
||||
/// nothing fills in asynchronously after connect, so a single discovery pass is sufficient.
|
||||
/// </summary>
|
||||
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
|
||||
|
||||
/// <summary>
|
||||
/// Stream the driver's tag set into the builder. Pre-declared tags from
|
||||
/// <see cref="AbCipDriverOptions.Tags"/> emit first; optionally, the
|
||||
|
||||
@@ -422,6 +422,13 @@ public sealed class AbLegacyDriver : IDriver, IReadable, IWritable, ITagDiscover
|
||||
|
||||
// ---- ITagDiscovery ----
|
||||
|
||||
/// <summary>
|
||||
/// Run-once: <see cref="DiscoverAsync"/> emits the complete node set synchronously from
|
||||
/// the configured device/tag tables within a single pass — there is no shape that fills
|
||||
/// in asynchronously after connect, so a single discovery pass is sufficient.
|
||||
/// </summary>
|
||||
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
|
||||
|
||||
/// <summary>
|
||||
/// Discovers tags and populates the address space asynchronously.
|
||||
/// </summary>
|
||||
|
||||
@@ -306,7 +306,16 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
Volatile.Read(ref _health).LastSuccessfulRead,
|
||||
$"FOCAS status 0x{status:X8} reading {reference}"));
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { throw; }
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Per-call timeout (not external cancellation) — the read stalled past the device
|
||||
// Timeout budget. Surface a recoverable comm error so the BadWaitingForInitialData
|
||||
// seed is overwritten and health degrades, instead of the read hanging forever.
|
||||
results[i] = new DataValueSnapshot(null, FocasStatusMapper.BadCommunicationError, null, now);
|
||||
Volatile.Write(ref _health, new DriverHealth(DriverState.Degraded,
|
||||
Volatile.Read(ref _health).LastSuccessfulRead, $"FOCAS read timed out for {reference}"));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
results[i] = new DataValueSnapshot(null, FocasStatusMapper.BadCommunicationError, null, now);
|
||||
@@ -356,7 +365,15 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
var status = await client.WriteAsync(parsed, def.DataType, w.Value, cancellationToken).ConfigureAwait(false);
|
||||
results[i] = new WriteResult(status);
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { throw; }
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Per-call timeout (not external cancellation) — the write stalled past the device
|
||||
// Timeout budget. Surface a recoverable comm error rather than aborting the batch.
|
||||
results[i] = new WriteResult(FocasStatusMapper.BadCommunicationError);
|
||||
Volatile.Write(ref _health, new DriverHealth(DriverState.Degraded,
|
||||
Volatile.Read(ref _health).LastSuccessfulRead, $"FOCAS write timed out for {w.FullReference}"));
|
||||
}
|
||||
catch (NotSupportedException nse)
|
||||
{
|
||||
results[i] = new WriteResult(FocasStatusMapper.BadNotSupported);
|
||||
@@ -384,6 +401,14 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
|
||||
// ---- ITagDiscovery ----
|
||||
|
||||
/// <summary>
|
||||
/// Retry-until-stable: the FixedTree subtree is filled in asynchronously by
|
||||
/// <see cref="FixedTreeLoopAsync"/> a couple of seconds AFTER connect, so the first
|
||||
/// post-connect <see cref="DiscoverAsync"/> pass would miss it — the host must re-run
|
||||
/// discovery until the captured node set is non-empty and stable.
|
||||
/// </summary>
|
||||
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.UntilStable;
|
||||
|
||||
/// <summary>Discovers tags and builds the OPC UA address space asynchronously.</summary>
|
||||
/// <param name="builder">The address space builder for constructing the OPC UA namespace.</param>
|
||||
/// <param name="cancellationToken">Cancellation token for the operation.</param>
|
||||
@@ -1113,7 +1138,11 @@ public sealed class FocasDriver : IDriver, IReadable, IWritable, ITagDiscovery,
|
||||
device.Client = null;
|
||||
}
|
||||
|
||||
device.Client = _clientFactory.Create();
|
||||
// Wrap the raw wire client so every operation on the device's single FOCAS/2 socket is
|
||||
// serialized (request→response on one socket cannot interleave) and time-bounded. Without
|
||||
// this, the equipment poll, fixed-tree loop, probe, and recycle loop collide on the shared
|
||||
// socket and a stalled read blocks forever — leaving bound tags at BadWaitingForInitialData.
|
||||
device.Client = new SynchronizedFocasClient(_clientFactory.Create(), _options.Timeout);
|
||||
try
|
||||
{
|
||||
await device.Client.ConnectAsync(device.ParsedAddress, _options.Timeout, ct).ConfigureAwait(false);
|
||||
|
||||
@@ -195,12 +195,41 @@ public static class FocasDriverFactoryExtensions
|
||||
AllowTrailingCommas = true,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Reads a JSON property as a string, tolerating a JSON <b>number</b> token as well. The
|
||||
/// AdminUI persists the FOCAS <c>Series</c> enum as its integer value (e.g. <c>"series":6</c>),
|
||||
/// while this DTO models <c>Series</c> as a string handed to <see cref="ParseSeries"/>
|
||||
/// (Enum.TryParse accepts the numeric form). Without this, System.Text.Json throws
|
||||
/// "Cannot get the value of a token type 'Number' as a string" on the bare number and the
|
||||
/// driver falls back to a stub. Accepts string / number / null and emits a string.
|
||||
/// </summary>
|
||||
internal sealed class FlexibleStringConverter : JsonConverter<string?>
|
||||
{
|
||||
public override string? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) =>
|
||||
reader.TokenType switch
|
||||
{
|
||||
JsonTokenType.String => reader.GetString(),
|
||||
JsonTokenType.Number => reader.TryGetInt64(out var n)
|
||||
? n.ToString(System.Globalization.CultureInfo.InvariantCulture)
|
||||
: reader.GetDouble().ToString(System.Globalization.CultureInfo.InvariantCulture),
|
||||
JsonTokenType.Null => null,
|
||||
_ => throw new JsonException($"Expected string, number, or null but got {reader.TokenType}."),
|
||||
};
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, string? value, JsonSerializerOptions options)
|
||||
{
|
||||
if (value is null) writer.WriteNullValue();
|
||||
else writer.WriteStringValue(value);
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed class FocasDriverConfigDto
|
||||
{
|
||||
/// <summary>Gets or sets the FOCAS client factory backend name (e.g. "wire" or "stub").</summary>
|
||||
public string? Backend { get; init; }
|
||||
|
||||
/// <summary>Gets or sets the CNC series for this driver.</summary>
|
||||
[JsonConverter(typeof(FlexibleStringConverter))]
|
||||
public string? Series { get; init; }
|
||||
|
||||
/// <summary>Gets or sets the operation timeout in milliseconds.</summary>
|
||||
@@ -234,6 +263,7 @@ public static class FocasDriverFactoryExtensions
|
||||
public string? DeviceName { get; init; }
|
||||
|
||||
/// <summary>Gets or sets the CNC series for this device (overrides top-level series if provided).</summary>
|
||||
[JsonConverter(typeof(FlexibleStringConverter))]
|
||||
public string? Series { get; init; }
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -21,9 +21,19 @@ public sealed record FocasHostAddress(string Host, int Port)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return null;
|
||||
const string prefix = "focas://";
|
||||
if (!value.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) return null;
|
||||
|
||||
var body = value[prefix.Length..];
|
||||
// Canonical form is focas://{ip}[:{port}], but the AdminUI persists the device host as a
|
||||
// scheme-less "{ip}[:{port}]" (e.g. "10.201.31.5:8193"). Accept that too: take the body
|
||||
// after focas:// when present, else the whole value when it carries NO other URI scheme
|
||||
// (a "://" that isn't ours — e.g. http:// — is still rejected). The host-contains-colon
|
||||
// guard below then rejects malformed scheme typos like "focas:10.0.0.5:8193".
|
||||
string body;
|
||||
if (value.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
|
||||
body = value[prefix.Length..];
|
||||
else if (!value.Contains("://", StringComparison.Ordinal))
|
||||
body = value;
|
||||
else
|
||||
return null;
|
||||
if (string.IsNullOrEmpty(body)) return null;
|
||||
|
||||
var colonIdx = body.LastIndexOf(':');
|
||||
@@ -39,7 +49,9 @@ public sealed record FocasHostAddress(string Host, int Port)
|
||||
{
|
||||
host = body;
|
||||
}
|
||||
if (string.IsNullOrEmpty(host)) return null;
|
||||
// Empty host, or a host still carrying a colon (e.g. the malformed "focas:10.0.0.5" left
|
||||
// when someone wrote "focas:10.0.0.5:8193" without the //), is invalid.
|
||||
if (string.IsNullOrEmpty(host) || host.Contains(':', StringComparison.Ordinal)) return null;
|
||||
return new FocasHostAddress(host, port);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.FOCAS;
|
||||
|
||||
/// <summary>
|
||||
/// Decorates an <see cref="IFocasClient"/> so that every wire operation on the device's
|
||||
/// single FOCAS/2 socket is (1) <b>serialized</b> against all other operations and
|
||||
/// (2) <b>time-bounded</b>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>FOCAS/2 over TCP:8193 is a strict request→response protocol on ONE socket. The
|
||||
/// driver holds a single <see cref="IFocasClient"/> per device, but several independent loops
|
||||
/// read from it concurrently — the equipment poll (<see cref="FocasDriver.ReadAsync"/>), the
|
||||
/// fixed-tree loop (<c>FixedTreeLoopAsync</c>), the connectivity probe, and the recycle loop.
|
||||
/// Without serialization, two reads interleave their <c>send(request); read(response)</c> on the
|
||||
/// same socket: one reader consumes the other's response PDU and the victim then blocks forever
|
||||
/// waiting for bytes that never arrive — leaving the bound OPC UA node stuck at
|
||||
/// <c>BadWaitingForInitialData</c>. This was the root cause of FOCAS equipment tags never
|
||||
/// surfacing a value while the probe reported HEALTHY (the probe reads work single-threaded on a
|
||||
/// dev box, but collide deployed once the fixed-tree loop runs concurrently).</para>
|
||||
///
|
||||
/// <para>The gate (<see cref="SemaphoreSlim"/> of count 1) makes each request→response atomic on
|
||||
/// the socket. The per-call timeout ensures a stalled response can never hold the gate — and thus
|
||||
/// the socket — indefinitely; a hung read surfaces as a recoverable error at the configured
|
||||
/// <c>Timeout</c> budget instead of permanent silence. The gate and timeout are paired
|
||||
/// deliberately: a lock around an <i>unbounded</i> read would deadlock all I/O for the device.</para>
|
||||
///
|
||||
/// <para><see cref="ConnectAsync"/> and <see cref="ProbeAsync"/> are serialized but NOT bounded by
|
||||
/// this decorator's call timeout — they carry their own budgets (the connect timeout argument and
|
||||
/// the probe's caller-supplied linked token respectively), and double-bounding would shrink them.</para>
|
||||
/// </remarks>
|
||||
public sealed class SynchronizedFocasClient : IFocasClient
|
||||
{
|
||||
private readonly IFocasClient _inner;
|
||||
private readonly TimeSpan _callTimeout;
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
|
||||
/// <summary>Wraps <paramref name="inner"/> with per-device serialization + a per-call timeout.</summary>
|
||||
/// <param name="inner">The underlying FOCAS client to serialize access to.</param>
|
||||
/// <param name="callTimeout">
|
||||
/// The budget applied to each data read/write. <see cref="TimeSpan.Zero"/> or negative disables
|
||||
/// the per-call timeout (callers' own cancellation tokens still apply).
|
||||
/// </param>
|
||||
public SynchronizedFocasClient(IFocasClient inner, TimeSpan callTimeout)
|
||||
{
|
||||
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
|
||||
_callTimeout = callTimeout;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsConnected => _inner.IsConnected;
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task ConnectAsync(FocasHostAddress address, TimeSpan timeout, CancellationToken cancellationToken) =>
|
||||
RunGatedAsync(ct => _inner.ConnectAsync(address, timeout, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<bool> ProbeAsync(CancellationToken cancellationToken) =>
|
||||
RunGatedAsync(ct => _inner.ProbeAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<(object? value, uint status)> ReadAsync(
|
||||
FocasAddress address, FocasDataType type, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.ReadAsync(address, type, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<uint> WriteAsync(
|
||||
FocasAddress address, FocasDataType type, object? value, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.WriteAsync(address, type, value, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasActiveAlarm>> ReadAlarmsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.ReadAlarmsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasSysInfo> GetSysInfoAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSysInfoAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasAxisName>> GetAxisNamesAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetAxisNamesAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasSpindleName>> GetSpindleNamesAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSpindleNamesAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasDynamicSnapshot> ReadDynamicAsync(int axisIndex, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.ReadDynamicAsync(axisIndex, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasProgramInfo> GetProgramInfoAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetProgramInfoAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<FocasTimer> GetTimerAsync(FocasTimerKind kind, CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetTimerAsync(kind, ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<FocasServoLoad>> GetServoLoadsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetServoLoadsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<int>> GetSpindleLoadsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSpindleLoadsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<int>> GetSpindleMaxRpmsAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetSpindleMaxRpmsAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<int>> GetPositionFiguresAsync(CancellationToken cancellationToken) =>
|
||||
RunBoundedAsync(ct => _inner.GetPositionFiguresAsync(ct), cancellationToken);
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
_inner.Dispose();
|
||||
_gate.Dispose();
|
||||
}
|
||||
|
||||
// Gate only — the caller already governs the budget (connect timeout arg / probe linked token).
|
||||
private async Task<T> RunGatedAsync<T>(Func<CancellationToken, Task<T>> op, CancellationToken ct)
|
||||
{
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try { return await op(ct).ConfigureAwait(false); }
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
|
||||
private async Task RunGatedAsync(Func<CancellationToken, Task> op, CancellationToken ct)
|
||||
{
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try { await op(ct).ConfigureAwait(false); }
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
|
||||
// Gate + per-call timeout. A fired timeout surfaces as OperationCanceledException whose token is
|
||||
// the linked (not the caller's) token — callers distinguish it from real cancellation by testing
|
||||
// their own token's IsCancellationRequested.
|
||||
private async Task<T> RunBoundedAsync<T>(Func<CancellationToken, Task<T>> op, CancellationToken ct)
|
||||
{
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
if (_callTimeout <= TimeSpan.Zero)
|
||||
return await op(ct).ConfigureAwait(false);
|
||||
|
||||
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
linked.CancelAfter(_callTimeout);
|
||||
return await op(linked.Token).ConfigureAwait(false);
|
||||
}
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
}
|
||||
@@ -584,6 +584,15 @@ public sealed class GalaxyDriver
|
||||
|
||||
// ===== ITagDiscovery (PR 4.1) =====
|
||||
|
||||
/// <summary>
|
||||
/// Run-once: <see cref="DiscoverAsync"/> fetches the full Galaxy hierarchy inline and
|
||||
/// streams the complete node set within a single awaited call — there is no FOCAS-style
|
||||
/// background cache that fills in after connect. Galaxy is a heavy network driver, so the
|
||||
/// bounded post-connect retry loop is deliberately avoided; re-discovery on Galaxy
|
||||
/// redeploy is handled separately via <see cref="IRediscoverable"/> + the deploy-event watcher.
|
||||
/// </summary>
|
||||
public DiscoveryRediscoverPolicy RediscoverPolicy => DiscoveryRediscoverPolicy.Once;
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
|
||||
{
|
||||
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
using Grpc.Core;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.HistorianGateway.Client;
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IAlarmHistorianWriter"/> backed by the HistorianGateway <c>SendEvent</c> path. The
|
||||
/// drain worker behind <c>SqliteStoreAndForwardSink</c> calls
|
||||
/// <see cref="WriteBatchAsync"/> and uses the returned per-event
|
||||
/// <see cref="HistorianWriteOutcome"/> to decide retry vs. dead-letter, so this writer maps every
|
||||
/// gateway result — success ack, the published client's typed exception hierarchy, raw
|
||||
/// <see cref="RpcException"/> (defensive), and any unexpected error — onto exactly one outcome per
|
||||
/// event and <b>never throws</b>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Each event is sent individually so one poison event cannot fail the whole batch: a permanent
|
||||
/// failure on event N is dead-lettered while its siblings continue. Outcomes are returned in
|
||||
/// input order, one per event; an empty batch yields an empty list with no gateway call.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Outcome mapping.</b> Success (or store-forward-queued) ack ⇒ <see cref="HistorianWriteOutcome.Ack"/>.
|
||||
/// Transient gRPC codes (<c>Unavailable</c>, <c>DeadlineExceeded</c>, <c>ResourceExhausted</c>,
|
||||
/// <c>Aborted</c>, <c>Internal</c>) and the auth codes (<c>Unauthenticated</c>,
|
||||
/// <c>PermissionDenied</c>) ⇒ <see cref="HistorianWriteOutcome.RetryPlease"/> — an auth fix
|
||||
/// re-enables the batch, so an auth blip never dead-letters. Permanent codes
|
||||
/// (<c>InvalidArgument</c>, <c>FailedPrecondition</c>, <c>OutOfRange</c>, <c>Unimplemented</c>) ⇒
|
||||
/// <see cref="HistorianWriteOutcome.PermanentFail"/> (dead-letter poison — mirrors the Wonderware
|
||||
/// <c>PerEventStatus==2</c> boundary). The typed client exceptions are classified by type, or by
|
||||
/// the <see cref="RpcException"/> they wrap; any other or unclassifiable error defaults to
|
||||
/// <see cref="HistorianWriteOutcome.PermanentFail"/> so the drain worker cannot loop a poison
|
||||
/// event forever.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class GatewayAlarmHistorianWriter : IAlarmHistorianWriter, IDisposable
|
||||
{
|
||||
private readonly IHistorianGatewayClient _client;
|
||||
private readonly ILogger<GatewayAlarmHistorianWriter> _logger;
|
||||
|
||||
/// <summary>Creates the writer over a gateway client seam.</summary>
|
||||
/// <param name="client">The gateway client used for the <c>SendEvent</c> write path.</param>
|
||||
/// <param name="logger">Logger for per-event outcome diagnostics (never logs event content).</param>
|
||||
public GatewayAlarmHistorianWriter(IHistorianGatewayClient client, ILogger<GatewayAlarmHistorianWriter> logger)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<HistorianWriteOutcome>> WriteBatchAsync(
|
||||
IReadOnlyList<AlarmHistorianEvent> batch, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(batch);
|
||||
|
||||
if (batch.Count == 0)
|
||||
{
|
||||
return Array.Empty<HistorianWriteOutcome>();
|
||||
}
|
||||
|
||||
var outcomes = new HistorianWriteOutcome[batch.Count];
|
||||
|
||||
for (var i = 0; i < batch.Count; i++)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// Shutdown mid-drain: short-circuit the remaining events to RetryPlease rather than
|
||||
// calling the gateway with a cancelled token. They stay queued for retry next startup
|
||||
// — a cancellation must NEVER dead-letter an in-flight event (silent data loss).
|
||||
outcomes[i] = HistorianWriteOutcome.RetryPlease;
|
||||
continue;
|
||||
}
|
||||
|
||||
outcomes[i] = await SendOneAsync(batch[i], cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return outcomes;
|
||||
}
|
||||
|
||||
private async Task<HistorianWriteOutcome> SendOneAsync(AlarmHistorianEvent evt, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
var ack = await _client.SendEventAsync(AlarmEventMapper.ToHistorianEvent(evt), cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
return MapAck(ack);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// Cancellation mid-send at shutdown is NOT a poison event. Map to RetryPlease so the
|
||||
// event stays queued for next startup rather than being dead-lettered (data loss).
|
||||
_logger.LogDebug("Alarm SendEvent cancelled at shutdown; will retry.");
|
||||
return HistorianWriteOutcome.RetryPlease;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
// NEVER throw out of the writer — the drain worker expects a per-event outcome. Classify
|
||||
// and log only the failure category (no event content, hostnames, or credentials).
|
||||
var outcome = Classify(exception);
|
||||
if (outcome == HistorianWriteOutcome.PermanentFail)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Alarm SendEvent permanently failed ({Exception}); dead-lettering the event.",
|
||||
exception.GetType().Name);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Alarm SendEvent transiently failed ({Exception}); will retry.",
|
||||
exception.GetType().Name);
|
||||
}
|
||||
|
||||
return outcome;
|
||||
}
|
||||
}
|
||||
|
||||
// A non-success ack that the gateway durably queued (store-forward) is still accepted — do not
|
||||
// re-drain it. A non-success, non-queued ack is a soft failure: retry rather than dead-letter.
|
||||
private static HistorianWriteOutcome MapAck(WriteAck ack) =>
|
||||
ack.Success || ack.Queued ? HistorianWriteOutcome.Ack : HistorianWriteOutcome.RetryPlease;
|
||||
|
||||
private static HistorianWriteOutcome Classify(Exception exception) => exception switch
|
||||
{
|
||||
// Published client's typed hierarchy (production reality). Unavailable + both auth kinds retry.
|
||||
HistorianGatewayUnavailableException => HistorianWriteOutcome.RetryPlease,
|
||||
HistorianGatewayAuthenticationException => HistorianWriteOutcome.RetryPlease,
|
||||
HistorianGatewayAuthorizationException => HistorianWriteOutcome.RetryPlease,
|
||||
// A base client exception wrapping a permanent/transient RpcException → classify by inner status.
|
||||
HistorianGatewayException { InnerException: RpcException inner } => ClassifyStatus(inner.StatusCode),
|
||||
// Defensive raw RpcException path (the seam type signature permits it).
|
||||
RpcException rpc => ClassifyStatus(rpc.StatusCode),
|
||||
// Anything else (incl. a bare base client exception we cannot classify) → dead-letter to avoid
|
||||
// an infinite drain loop on a poison event.
|
||||
_ => HistorianWriteOutcome.PermanentFail,
|
||||
};
|
||||
|
||||
private static HistorianWriteOutcome ClassifyStatus(StatusCode code) => code switch
|
||||
{
|
||||
StatusCode.Unavailable
|
||||
or StatusCode.DeadlineExceeded
|
||||
or StatusCode.ResourceExhausted
|
||||
or StatusCode.Aborted
|
||||
or StatusCode.Internal
|
||||
// An auth fix re-enables the whole batch — never dead-letter on an auth blip.
|
||||
or StatusCode.Unauthenticated
|
||||
or StatusCode.PermissionDenied => HistorianWriteOutcome.RetryPlease,
|
||||
StatusCode.InvalidArgument
|
||||
or StatusCode.FailedPrecondition
|
||||
or StatusCode.OutOfRange
|
||||
or StatusCode.Unimplemented => HistorianWriteOutcome.PermanentFail,
|
||||
// Unknown/unclassified gRPC code → dead-letter to avoid an infinite drain loop.
|
||||
_ => HistorianWriteOutcome.PermanentFail,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Disposes the underlying gateway client and its gRPC channel. The concrete
|
||||
/// <see cref="HistorianGatewayClientAdapter"/> implements <see cref="IDisposable"/>; test doubles
|
||||
/// that only implement <see cref="IAsyncDisposable"/> are safely no-opped by the cast guard.
|
||||
/// </summary>
|
||||
public void Dispose() => (_client as IDisposable)?.Dispose();
|
||||
}
|
||||
@@ -0,0 +1,345 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Server-side <see cref="IHistorianDataSource"/> backed by the HistorianGateway gRPC surface
|
||||
/// (via the <see cref="IHistorianGatewayClient"/> seam). Translates OPC UA HistoryRead requests
|
||||
/// to gateway read calls and maps the wire shapes back to the driver-agnostic
|
||||
/// <see cref="DataValueSnapshot"/> / <see cref="HistoricalEvent"/> carriers using the pure
|
||||
/// mappers in <c>Mapping/</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The data source owns no historian connection of its own — it delegates to the gateway, which
|
||||
/// pools and amortizes the underlying historian sessions. A thrown gateway exception is recorded
|
||||
/// as a health failure and rethrown: the node manager turns it into a Bad HistoryRead result, so
|
||||
/// a backend fault never crashes the host. An empty time window is a successful (GoodNoData)
|
||||
/// read, not a fault. Health counters follow the single-<c>_healthLock</c> discipline ported
|
||||
/// from <c>WonderwareHistorianClient</c> so <c>TotalSuccesses + TotalFailures == TotalQueries</c>
|
||||
/// holds at every observed snapshot.
|
||||
/// </remarks>
|
||||
public sealed class GatewayHistorianDataSource : IHistorianDataSource, IAsyncDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// <see cref="ConnectionStatus.ConnectionKind"/> is a combinable [Flags] value: the
|
||||
/// process-data connection is bit 0 (value 1), the event connection is bit 1 (value 2).
|
||||
/// </summary>
|
||||
private const uint ProcessConnectionFlag = 1;
|
||||
private const uint EventConnectionFlag = 2;
|
||||
|
||||
private readonly IHistorianGatewayClient _client;
|
||||
private readonly ILogger<GatewayHistorianDataSource> _logger;
|
||||
|
||||
private readonly object _healthLock = new();
|
||||
private DateTime? _lastSuccessUtc;
|
||||
private DateTime? _lastFailureUtc;
|
||||
private string? _lastError;
|
||||
private long _totalQueries;
|
||||
private long _totalSuccesses;
|
||||
private long _totalFailures;
|
||||
private int _consecutiveFailures;
|
||||
private bool _processConnectionOpen;
|
||||
private bool _eventConnectionOpen;
|
||||
|
||||
/// <summary>Creates a gateway-backed historian data source.</summary>
|
||||
/// <param name="client">The gateway client seam used for all reads.</param>
|
||||
/// <param name="logger">Diagnostic logger; failures are recorded without leaking tag/host detail.</param>
|
||||
public GatewayHistorianDataSource(IHistorianGatewayClient client, ILogger<GatewayHistorianDataSource> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(client);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
_client = client;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<HistoryReadResult> ReadRawAsync(
|
||||
string fullReference, DateTime startUtc, DateTime endUtc, uint maxValuesPerNode,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
// The gateway seam caps with an int; OPC UA hands us a uint, so clamp to int range.
|
||||
var maxValues = (int)Math.Min(maxValuesPerNode, int.MaxValue);
|
||||
var samples = new List<HistorianSample>();
|
||||
await foreach (var sample in _client
|
||||
.ReadRawAsync(fullReference, startUtc, endUtc, maxValues, cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
samples.Add(sample);
|
||||
}
|
||||
|
||||
var snapshots = SampleMapper.ToSnapshots(samples);
|
||||
RecordOutcome(success: true, error: null);
|
||||
return new HistoryReadResult(snapshots, ContinuationPoint: null);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
RecordReadFailure(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<HistoryReadResult> ReadProcessedAsync(
|
||||
string fullReference, DateTime startUtc, DateTime endUtc, TimeSpan interval,
|
||||
HistoryAggregateType aggregate, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Total/Count are now native gateway retrieval modes — no client-side scaling
|
||||
// (unlike the Wonderware path that derived Total as Average × interval-seconds).
|
||||
var mode = AggregateModeMapper.ToRetrievalMode(aggregate);
|
||||
var buckets = new List<HistorianAggregateSample>();
|
||||
await foreach (var bucket in _client
|
||||
.ReadAggregateAsync(fullReference, startUtc, endUtc, mode, interval, cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
buckets.Add(bucket);
|
||||
}
|
||||
|
||||
var snapshots = SampleMapper.ToAggregateSnapshots(buckets);
|
||||
RecordOutcome(success: true, error: null);
|
||||
return new HistoryReadResult(snapshots, ContinuationPoint: null);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
RecordReadFailure(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<HistoryReadResult> ReadAtTimeAsync(
|
||||
string fullReference, IReadOnlyList<DateTime> timestampsUtc, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
var samples = await _client
|
||||
.ReadAtTimeAsync(fullReference, timestampsUtc, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
var aligned = AlignAtTimeSnapshots(timestampsUtc, samples);
|
||||
RecordOutcome(success: true, error: null);
|
||||
return new HistoryReadResult(aligned, ContinuationPoint: null);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
RecordReadFailure(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <remarks>
|
||||
/// Depends on the target gateway running with <c>RuntimeDb:EventReadsEnabled=true</c> (the
|
||||
/// SQL alarm-history path). The <paramref name="sourceName"/> is passed through to the
|
||||
/// gateway, but its SQL <c>ReadEvents</c> source filter may not be present yet — so this
|
||||
/// adapter also filters the mapped events by <see cref="HistoricalEvent.SourceName"/>
|
||||
/// client-side (defensive; remove once the server filter is confirmed). The
|
||||
/// <paramref name="maxEvents"/> cap is enforced client-side by early stream termination:
|
||||
/// a non-positive value applies no client cap (the gateway may still apply its
|
||||
/// <c>EventReadMaxRows</c>); a positive cap stops at N and sets a non-null
|
||||
/// <see cref="HistoricalEventsResult.ContinuationPoint"/> iff at least one further matching
|
||||
/// event existed (the Core.Abstractions-009 truncation signal).
|
||||
/// </remarks>
|
||||
public async Task<HistoricalEventsResult> ReadEventsAsync(
|
||||
string? sourceName, DateTime startUtc, DateTime endUtc, int maxEvents,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
var hasCap = maxEvents > 0;
|
||||
var collected = new List<HistoricalEvent>(hasCap ? maxEvents : 0);
|
||||
var truncated = false;
|
||||
|
||||
await foreach (var wireEvent in _client
|
||||
.ReadEventsAsync(sourceName, startUtc, endUtc, maxEvents, cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
var mapped = EventMapper.ToHistoricalEvent(wireEvent);
|
||||
|
||||
// Defensive client-side source filter: the gateway's SQL ReadEvents source filter
|
||||
// may not be present, so drop any event whose source does not match the request.
|
||||
if (sourceName is not null && !string.Equals(mapped.SourceName, sourceName, StringComparison.Ordinal))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// One more matching event arriving once the cap is full means the result is
|
||||
// truncated — stop draining and flag it (Core.Abstractions-009).
|
||||
if (hasCap && collected.Count == maxEvents)
|
||||
{
|
||||
truncated = true;
|
||||
break;
|
||||
}
|
||||
|
||||
collected.Add(mapped);
|
||||
}
|
||||
|
||||
RecordOutcome(success: true, error: null);
|
||||
// A non-null, opaque token signals truncation to the caller (Core.Abstractions-009).
|
||||
// The gateway has no resumable cursor, so the token's contents carry no paging state —
|
||||
// its presence alone is the "more events exist" signal. A fresh array per call keeps it
|
||||
// from being shared/mutated.
|
||||
return new HistoricalEventsResult(collected, truncated ? new byte[] { 0x01 } : null);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
RecordReadFailure(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public HistorianHealthSnapshot GetHealthSnapshot()
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
return new HistorianHealthSnapshot(
|
||||
TotalQueries: _totalQueries,
|
||||
TotalSuccesses: _totalSuccesses,
|
||||
TotalFailures: _totalFailures,
|
||||
ConsecutiveFailures: _consecutiveFailures,
|
||||
LastSuccessTime: _lastSuccessUtc,
|
||||
LastFailureTime: _lastFailureUtc,
|
||||
LastError: _lastError,
|
||||
// Cached connection flags last observed by RefreshConnectionStateAsync. The gateway
|
||||
// is non-clustered to us, so node fields are null/empty (mirrors the Wonderware
|
||||
// client's Finding 010 posture).
|
||||
ProcessConnectionOpen: _processConnectionOpen,
|
||||
EventConnectionOpen: _eventConnectionOpen,
|
||||
ActiveProcessNode: null,
|
||||
ActiveEventNode: null,
|
||||
Nodes: []);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Refreshes the cached process / event connection flags by querying the gateway's
|
||||
/// connection status. Intended to be driven by a periodic health hosted-service, keeping
|
||||
/// <see cref="GetHealthSnapshot"/> pure observation (it never performs I/O). The flags are
|
||||
/// derived from <see cref="ConnectionStatus.ConnectedToServer"/> AND the matching
|
||||
/// <see cref="ConnectionStatus.ConnectionKind"/> flag bit. A failed status query is a health
|
||||
/// probe — it never throws to the caller; both flags degrade to closed until the next
|
||||
/// successful refresh.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token to cancel the status query.</param>
|
||||
/// <returns>A task that completes when the cached flags have been updated.</returns>
|
||||
public async Task RefreshConnectionStateAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
bool processOpen;
|
||||
bool eventOpen;
|
||||
try
|
||||
{
|
||||
var status = await _client.GetConnectionStatusAsync(cancellationToken).ConfigureAwait(false);
|
||||
var connected = status.ConnectedToServer;
|
||||
processOpen = connected && (status.ConnectionKind & ProcessConnectionFlag) != 0;
|
||||
eventOpen = connected && (status.ConnectionKind & EventConnectionFlag) != 0;
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// A health probe must never crash the host; an unreachable gateway degrades both
|
||||
// connection flags to closed until the next successful refresh.
|
||||
_logger.LogDebug("Historian gateway connection-status refresh failed; treating both connections as closed.");
|
||||
processOpen = false;
|
||||
eventOpen = false;
|
||||
}
|
||||
|
||||
lock (_healthLock)
|
||||
{
|
||||
_processConnectionOpen = processOpen;
|
||||
_eventConnectionOpen = eventOpen;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reconciles a gateway at-time reply against the requested timestamps to honour the
|
||||
/// <see cref="IHistorianDataSource.ReadAtTimeAsync"/> contract: exactly one snapshot per
|
||||
/// requested timestamp, in request order. Returned samples are indexed by timestamp ticks;
|
||||
/// any requested timestamp the gateway did not return is filled with a Bad-quality
|
||||
/// (<c>0x80000000</c>) snapshot stamped at the requested time rather than positionally
|
||||
/// misaligning values. The alignment logic was ported from the now-retired Wonderware
|
||||
/// client's at-time snapshot reconciliation.
|
||||
/// </summary>
|
||||
private static IReadOnlyList<DataValueSnapshot> AlignAtTimeSnapshots(
|
||||
IReadOnlyList<DateTime> timestampsUtc, IReadOnlyList<HistorianSample> samples)
|
||||
{
|
||||
// Index returned samples by timestamp ticks. Duplicate timestamps keep the first.
|
||||
var byTicks = new Dictionary<long, HistorianSample>(samples.Count);
|
||||
foreach (var sample in samples)
|
||||
{
|
||||
if (sample.Timestamp is null) continue;
|
||||
byTicks.TryAdd(sample.Timestamp.ToDateTime().Ticks, sample);
|
||||
}
|
||||
|
||||
var result = new DataValueSnapshot[timestampsUtc.Count];
|
||||
for (var i = 0; i < timestampsUtc.Count; i++)
|
||||
{
|
||||
var requested = DateTime.SpecifyKind(timestampsUtc[i], DateTimeKind.Utc);
|
||||
if (byTicks.TryGetValue(requested.Ticks, out var sample))
|
||||
{
|
||||
// Reuse the shared sample mapper for value + quality, then re-stamp the source
|
||||
// timestamp to the requested time per the ReadAtTime contract.
|
||||
result[i] = SampleMapper.ToSnapshot(sample) with { SourceTimestampUtc = requested };
|
||||
}
|
||||
else
|
||||
{
|
||||
// Gap — gateway returned no sample for this timestamp. Per the contract this is a
|
||||
// Bad-quality snapshot stamped at the requested time, not a dropped row.
|
||||
result[i] = new DataValueSnapshot(
|
||||
Value: null,
|
||||
StatusCode: 0x80000000u, // Bad
|
||||
SourceTimestampUtc: requested,
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a failed read: bumps the health counters and logs a generic, redaction-safe
|
||||
/// debug line (no tag, host, or value). The exception itself is rethrown by the caller.
|
||||
/// </summary>
|
||||
private void RecordReadFailure(Exception ex)
|
||||
{
|
||||
RecordOutcome(success: false, error: ex.Message);
|
||||
_logger.LogDebug("Historian gateway read operation failed and was recorded as a health failure.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records the outcome of a single read — increments <c>_totalQueries</c> and exactly one of
|
||||
/// <c>_totalSuccesses</c> / <c>_totalFailures</c> under a single <c>_healthLock</c>
|
||||
/// acquisition so a concurrent <see cref="GetHealthSnapshot"/> never observes a torn state.
|
||||
/// </summary>
|
||||
private void RecordOutcome(bool success, string? error)
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
_totalQueries++;
|
||||
if (success)
|
||||
{
|
||||
_totalSuccesses++;
|
||||
_consecutiveFailures = 0;
|
||||
_lastSuccessUtc = DateTime.UtcNow;
|
||||
}
|
||||
else
|
||||
{
|
||||
_totalFailures++;
|
||||
_consecutiveFailures++;
|
||||
_lastFailureUtc = DateTime.UtcNow;
|
||||
_lastError = error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Disposes the underlying gateway client. Prefer <see cref="DisposeAsync"/>.</summary>
|
||||
public void Dispose() => DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
|
||||
/// <summary>Asynchronously disposes the underlying gateway client.</summary>
|
||||
/// <returns>A task that completes when the client has been disposed.</returns>
|
||||
public ValueTask DisposeAsync() => _client.DisposeAsync();
|
||||
}
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
using ZB.MOM.WW.OtOpcUa.Runtime.Historian;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Host-callable factories that build the gateway-backed historian seams against the single
|
||||
/// <c>ServerHistorian</c> gateway: <see cref="CreateDataSource"/> for the read path (the Host's
|
||||
/// <c>AddServerHistorian</c> wiring) and <see cref="CreateAlarmWriter"/> for the alarm-write path
|
||||
/// (the Host's <c>AddAlarmHistorian</c> wiring). Both keep the concrete package-client dependency
|
||||
/// inside this driver project — the Host references only the driver, not the package client directly.
|
||||
/// </summary>
|
||||
public static class GatewayHistorian
|
||||
{
|
||||
/// <summary>
|
||||
/// Builds a <see cref="GatewayHistorianDataSource"/> over a lazily connected
|
||||
/// <see cref="HistorianGatewayClientAdapter"/> mapped from the bound
|
||||
/// <see cref="ServerHistorianOptions"/>. Resolves an <see cref="ILoggerFactory"/> and the data
|
||||
/// source's <see cref="ILogger{TCategoryName}"/> from <paramref name="services"/>, falling back to
|
||||
/// the null implementations when absent (e.g. minimal test providers). Performs no network I/O —
|
||||
/// the underlying channel dials on first use.
|
||||
/// </summary>
|
||||
/// <param name="options">The bound <c>ServerHistorian</c> configuration.</param>
|
||||
/// <param name="services">The resolving service provider (used only to locate logging services).</param>
|
||||
/// <returns>The gateway-backed <see cref="IHistorianDataSource"/>.</returns>
|
||||
public static IHistorianDataSource CreateDataSource(ServerHistorianOptions options, IServiceProvider services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var loggerFactory = services.GetService<ILoggerFactory>() ?? NullLoggerFactory.Instance;
|
||||
var logger = services.GetService<ILogger<GatewayHistorianDataSource>>()
|
||||
?? NullLogger<GatewayHistorianDataSource>.Instance;
|
||||
|
||||
return new GatewayHistorianDataSource(
|
||||
HistorianGatewayClientAdapter.Create(options, loggerFactory),
|
||||
logger);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds a <see cref="GatewayAlarmHistorianWriter"/> over a lazily connected
|
||||
/// <see cref="HistorianGatewayClientAdapter"/> mapped from the bound
|
||||
/// <see cref="ServerHistorianOptions"/> — the <b>same single gateway</b> the read path
|
||||
/// (<see cref="CreateDataSource"/>) targets. The Host's <c>AddAlarmHistorian</c> wiring supplies
|
||||
/// this as the concrete <see cref="IAlarmHistorianWriter"/> the durable
|
||||
/// <c>SqliteStoreAndForwardSink</c> drain worker delegates to, sourcing the connection from the
|
||||
/// <c>ServerHistorian</c> section (endpoint/key/TLS) rather than the legacy Wonderware-shaped
|
||||
/// <c>AlarmHistorian</c> host/port. Resolves an <see cref="ILoggerFactory"/> and the writer's
|
||||
/// <see cref="ILogger{TCategoryName}"/> from <paramref name="services"/>, falling back to the null
|
||||
/// implementations when absent. Performs no network I/O — the underlying channel dials on first send.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This deliberately constructs its <b>own</b> <see cref="HistorianGatewayClientAdapter"/> — a
|
||||
/// second gRPC channel to the same gateway as the read path. Collapsing the two onto one shared
|
||||
/// channel would require the container to own a singleton <see cref="IHistorianGatewayClient"/> and
|
||||
/// the read-side <see cref="GatewayHistorianDataSource"/> to stop owning + disposing its client,
|
||||
/// regressing the read cutover's dispose ownership (and its tests). A second channel to a co-located
|
||||
/// sidecar is cheap — the gateway pools and amortizes the underlying historian sessions server-side —
|
||||
/// so each path keeps its own channel with a clean, independent lifetime.
|
||||
/// </remarks>
|
||||
/// <param name="options">The bound <c>ServerHistorian</c> configuration (endpoint, key, TLS posture).</param>
|
||||
/// <param name="services">The resolving service provider (used only to locate logging services).</param>
|
||||
/// <returns>The gateway-backed <see cref="IAlarmHistorianWriter"/>.</returns>
|
||||
public static IAlarmHistorianWriter CreateAlarmWriter(ServerHistorianOptions options, IServiceProvider services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
var loggerFactory = services.GetService<ILoggerFactory>() ?? NullLoggerFactory.Instance;
|
||||
var logger = services.GetService<ILogger<GatewayAlarmHistorianWriter>>()
|
||||
?? NullLogger<GatewayAlarmHistorianWriter>.Instance;
|
||||
|
||||
return new GatewayAlarmHistorianWriter(
|
||||
HistorianGatewayClientAdapter.Create(options, loggerFactory),
|
||||
logger);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IHistorianProvisioning"/> backed by the HistorianGateway <c>EnsureTags</c> path.
|
||||
/// Non-historizable driver types are skipped (never built into a definition); the historizable
|
||||
/// ones are mapped via <see cref="HistorianTypeMapper"/> and batched into a single
|
||||
/// <c>EnsureTags</c> call.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <b>Non-blocking.</b> A historian that is unreachable or errors must never fail an address-space
|
||||
/// apply, so the gateway call is wrapped in a catch-all: any exception counts the whole sent batch
|
||||
/// as <see cref="HistorianProvisionResult.Failed"/> and returns. The method never throws and never
|
||||
/// logs tag values, hostnames, or credentials.
|
||||
/// </remarks>
|
||||
public sealed class GatewayTagProvisioner : IHistorianProvisioning
|
||||
{
|
||||
private readonly IHistorianGatewayClient _client;
|
||||
private readonly ILogger<GatewayTagProvisioner> _logger;
|
||||
|
||||
/// <summary>Creates the provisioner over a gateway client seam.</summary>
|
||||
/// <param name="client">The gateway client used for the <c>EnsureTags</c> path.</param>
|
||||
/// <param name="logger">Logger for skip/failure diagnostics (never logs tag values).</param>
|
||||
public GatewayTagProvisioner(IHistorianGatewayClient client, ILogger<GatewayTagProvisioner> logger)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<HistorianProvisionResult> EnsureTagsAsync(
|
||||
IReadOnlyList<HistorianTagProvisionRequest> requests, CancellationToken ct)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(requests);
|
||||
|
||||
var definitions = new List<HistorianTagDefinition>(requests.Count);
|
||||
var skipped = 0;
|
||||
|
||||
foreach (var request in requests)
|
||||
{
|
||||
if (!HistorianTypeMapper.IsHistorizable(request.DataType))
|
||||
{
|
||||
skipped++;
|
||||
// Log only the (non-sensitive) data type — never the tag name.
|
||||
_logger.LogDebug(
|
||||
"Skipping provisioning of a non-historizable tag of type {DataType}.", request.DataType);
|
||||
continue;
|
||||
}
|
||||
|
||||
definitions.Add(new HistorianTagDefinition
|
||||
{
|
||||
TagName = request.TagName,
|
||||
DataType = HistorianTypeMapper.ToHistorianDataType(request.DataType),
|
||||
// Proto string fields are non-nullable — coalesce absent metadata to empty.
|
||||
EngineeringUnit = request.EngineeringUnit ?? string.Empty,
|
||||
Description = request.Description ?? string.Empty,
|
||||
});
|
||||
}
|
||||
|
||||
// Every request was non-historizable — nothing to send. Skip the empty gateway round-trip.
|
||||
if (definitions.Count == 0)
|
||||
{
|
||||
return new HistorianProvisionResult(requests.Count, Ensured: 0, Skipped: skipped, Failed: 0);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var results = await _client.EnsureTagsAsync(definitions, ct).ConfigureAwait(false);
|
||||
var ensured = results.Results.Count(r => r.Success);
|
||||
var failed = Math.Max(0, definitions.Count - ensured);
|
||||
return new HistorianProvisionResult(requests.Count, ensured, skipped, failed);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// A plain shutdown cancellation is not a provisioning fault worth a scary Warning. Count
|
||||
// the unsent batch as Failed (it didn't land) but log quietly and never throw, keeping
|
||||
// the non-blocking contract.
|
||||
_logger.LogDebug("Tag provisioning cancelled at shutdown; deferred.");
|
||||
return new HistorianProvisionResult(requests.Count, Ensured: 0, Skipped: skipped, Failed: definitions.Count);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
// Non-blocking: a failed EnsureTags never fails the apply. Count the whole sent batch as
|
||||
// Failed and return; log only the failure category (no tag values).
|
||||
_logger.LogWarning(
|
||||
"EnsureTags failed for {Count} historian tag(s) ({Exception}); provisioning deferred.",
|
||||
definitions.Count, exception.GetType().Name);
|
||||
return new HistorianProvisionResult(requests.Count, Ensured: 0, Skipped: skipped, Failed: definitions.Count);
|
||||
}
|
||||
}
|
||||
}
|
||||
+126
@@ -0,0 +1,126 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.HistorianGateway.Client;
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Runtime.Historian;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Concrete <see cref="IHistorianGatewayClient"/> backed by the published
|
||||
/// <see cref="HistorianGatewayClient"/> package client. Each seam method forwards directly to the
|
||||
/// matching client wrapper — both sides speak the same generated <c>historian_gateway.v1</c> proto
|
||||
/// types, so no shape translation happens here. The package client's typed exception hierarchy
|
||||
/// (<c>HistorianGatewayUnavailableException</c> et al.) is allowed to surface unchanged; the
|
||||
/// <see cref="GatewayHistorianDataSource"/> records it as a health failure and the node manager
|
||||
/// turns it into a Bad HistoryRead result.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Lazy channel.</b> <see cref="Create"/> calls <see cref="HistorianGatewayClient.Create"/>,
|
||||
/// which constructs a <c>GrpcChannel</c> over a <c>SocketsHttpHandler</c> without opening a
|
||||
/// connection — the first RPC dials. Constructing the adapter therefore performs no network I/O,
|
||||
/// which the offline seam tests rely on (they build from bogus endpoints and must not connect).
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class HistorianGatewayClientAdapter : IHistorianGatewayClient, IDisposable
|
||||
{
|
||||
private readonly HistorianGatewayClient _inner;
|
||||
|
||||
private HistorianGatewayClientAdapter(HistorianGatewayClient inner) => _inner = inner;
|
||||
|
||||
/// <summary>
|
||||
/// Builds an adapter over a freshly created package client mapped from the bound
|
||||
/// <see cref="ServerHistorianOptions"/>. No connection is opened (lazy channel).
|
||||
/// </summary>
|
||||
/// <param name="options">The bound <c>ServerHistorian</c> configuration (endpoint, key, TLS posture).</param>
|
||||
/// <param name="loggerFactory">Logger factory threaded into the package client's channel diagnostics.</param>
|
||||
/// <returns>A ready-to-use adapter whose underlying channel has not yet dialed the gateway.</returns>
|
||||
public static HistorianGatewayClientAdapter Create(ServerHistorianOptions options, ILoggerFactory loggerFactory)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentNullException.ThrowIfNull(loggerFactory);
|
||||
|
||||
var clientOptions = new HistorianGatewayClientOptions
|
||||
{
|
||||
Endpoint = new Uri(options.Endpoint),
|
||||
ApiKey = options.ApiKey,
|
||||
UseTls = options.UseTls,
|
||||
CaCertificatePath = options.CaCertificatePath,
|
||||
// INVERTED mapping: ServerHistorianOptions.AllowUntrustedServerCertificate (opt-in to accept
|
||||
// a self-signed cert) is the negation of the client's RequireCertificateValidation. Allowing
|
||||
// an untrusted cert == not requiring validation; a pinned CaCertificatePath always verifies.
|
||||
RequireCertificateValidation = !options.AllowUntrustedServerCertificate,
|
||||
DefaultCallTimeout = options.CallTimeout,
|
||||
LoggerFactory = loggerFactory,
|
||||
};
|
||||
|
||||
return new HistorianGatewayClientAdapter(HistorianGatewayClient.Create(clientOptions));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IAsyncEnumerable<HistorianSample> ReadRawAsync(
|
||||
string tag, DateTime startUtc, DateTime endUtc, int maxValues, CancellationToken ct) =>
|
||||
_inner.ReadRawAsync(tag, startUtc, endUtc, maxValues, ct);
|
||||
|
||||
/// <inheritdoc />
|
||||
public IAsyncEnumerable<HistorianAggregateSample> ReadAggregateAsync(
|
||||
string tag, DateTime startUtc, DateTime endUtc, RetrievalMode mode, TimeSpan interval, CancellationToken ct) =>
|
||||
_inner.ReadAggregateAsync(tag, startUtc, endUtc, mode, interval, ct);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IReadOnlyList<HistorianSample>> ReadAtTimeAsync(
|
||||
string tag, IReadOnlyList<DateTime> timestampsUtc, CancellationToken ct) =>
|
||||
_inner.ReadAtTimeAsync(tag, timestampsUtc, ct);
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <remarks>
|
||||
/// <paramref name="sourceName"/> is rendered into the gateway's one server-filterable predicate —
|
||||
/// a <c>Source_Object</c> <see cref="HistorianEventComparison.Equal"/> filter the SQL ReadEvents
|
||||
/// path binds as <c>WHERE Source_Object = @source</c>. A <c>null</c> source passes a null filter
|
||||
/// (full window). <paramref name="maxEvents"/> is intentionally ignored here: the gateway wire
|
||||
/// contract carries no per-call cap, so the cap is enforced upstream by
|
||||
/// <see cref="GatewayHistorianDataSource"/> via early stream termination.
|
||||
/// </remarks>
|
||||
public IAsyncEnumerable<HistorianEvent> ReadEventsAsync(
|
||||
string? sourceName, DateTime startUtc, DateTime endUtc, int maxEvents, CancellationToken ct)
|
||||
{
|
||||
HistorianEventFilter? filter = sourceName is null
|
||||
? null
|
||||
: new HistorianEventFilter
|
||||
{
|
||||
PropertyName = "Source_Object",
|
||||
Comparison = HistorianEventComparison.Equal,
|
||||
Value = sourceName,
|
||||
};
|
||||
|
||||
return _inner.ReadEventsAsync(startUtc, endUtc, filter, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<WriteAck> WriteLiveValuesAsync(
|
||||
string tag, IReadOnlyList<HistorianLiveValue> values, CancellationToken ct) =>
|
||||
_inner.WriteLiveValuesAsync(tag, values, ct);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<WriteAck> SendEventAsync(HistorianEvent evt, CancellationToken ct) =>
|
||||
_inner.SendEventAsync(evt, ct);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<TagOperationResults> EnsureTagsAsync(
|
||||
IReadOnlyList<HistorianTagDefinition> definitions, CancellationToken ct) =>
|
||||
_inner.EnsureTagsAsync(definitions, ct);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<bool> ProbeAsync(CancellationToken ct) => _inner.ProbeAsync(ct);
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<ConnectionStatus> GetConnectionStatusAsync(CancellationToken ct) =>
|
||||
_inner.GetConnectionStatusAsync(ct);
|
||||
|
||||
/// <summary>Disposes the underlying package client (and its channel). Prefer <see cref="DisposeAsync"/>.</summary>
|
||||
public void Dispose() => _inner.Dispose();
|
||||
|
||||
/// <summary>Asynchronously disposes the underlying package client (and its channel).</summary>
|
||||
/// <returns>A task that completes when the client has been disposed.</returns>
|
||||
public ValueTask DisposeAsync() => _inner.DisposeAsync();
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway;
|
||||
|
||||
/// <summary>
|
||||
/// Abstraction over the HistorianGateway gRPC client surface consumed by the OtOpcUa historian
|
||||
/// backend driver. Proto-typed (the wire contract lives in
|
||||
/// <c>ZB.MOM.WW.HistorianGateway.Contracts.Grpc</c>); the concrete adapter wrapping
|
||||
/// <c>HistorianGatewayClient</c> is supplied by a later task. The seam exists so the driver and
|
||||
/// its tests can depend on a fake without a live gateway.
|
||||
/// </summary>
|
||||
public interface IHistorianGatewayClient : IAsyncDisposable
|
||||
{
|
||||
/// <summary>Streams raw historian samples for a tag over a time window.</summary>
|
||||
IAsyncEnumerable<HistorianSample> ReadRawAsync(
|
||||
string tag,
|
||||
DateTime startUtc,
|
||||
DateTime endUtc,
|
||||
int maxValues,
|
||||
CancellationToken ct);
|
||||
|
||||
/// <summary>Streams aggregate samples for a tag using the given retrieval mode and interval.</summary>
|
||||
IAsyncEnumerable<HistorianAggregateSample> ReadAggregateAsync(
|
||||
string tag,
|
||||
DateTime startUtc,
|
||||
DateTime endUtc,
|
||||
RetrievalMode mode,
|
||||
TimeSpan interval,
|
||||
CancellationToken ct);
|
||||
|
||||
/// <summary>Reads the samples nearest to each of the requested timestamps (unary).</summary>
|
||||
Task<IReadOnlyList<HistorianSample>> ReadAtTimeAsync(
|
||||
string tag,
|
||||
IReadOnlyList<DateTime> timestampsUtc,
|
||||
CancellationToken ct);
|
||||
|
||||
/// <summary>Streams historian events over a window, optionally filtered to a single source name.</summary>
|
||||
/// <param name="sourceName">Optional source-name filter; <c>null</c> returns events from all sources.</param>
|
||||
/// <param name="startUtc">Inclusive start of the time window (UTC).</param>
|
||||
/// <param name="endUtc">Exclusive end of the time window (UTC).</param>
|
||||
/// <param name="maxEvents">
|
||||
/// Caps the number of events returned. The gateway wire contract (<c>ReadEventsRequest</c>) has
|
||||
/// no per-call server cap, so this limit is enforced client-side by early stream termination:
|
||||
/// <c>0</c> or negative means no client-side limit (the gateway may still apply its configured
|
||||
/// <c>RuntimeDb:EventReadMaxRows</c>); a positive value stops draining after that many events.
|
||||
/// </param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
IAsyncEnumerable<HistorianEvent> ReadEventsAsync(
|
||||
string? sourceName,
|
||||
DateTime startUtc,
|
||||
DateTime endUtc,
|
||||
int maxEvents,
|
||||
CancellationToken ct);
|
||||
|
||||
/// <summary>Writes live values for a tag through the gateway's SQL live-write path.</summary>
|
||||
Task<WriteAck> WriteLiveValuesAsync(
|
||||
string tag,
|
||||
IReadOnlyList<HistorianLiveValue> values,
|
||||
CancellationToken ct);
|
||||
|
||||
/// <summary>Sends a single historian event.</summary>
|
||||
Task<WriteAck> SendEventAsync(HistorianEvent evt, CancellationToken ct);
|
||||
|
||||
/// <summary>Ensures the supplied tag definitions exist (create-or-update).</summary>
|
||||
Task<TagOperationResults> EnsureTagsAsync(
|
||||
IReadOnlyList<HistorianTagDefinition> definitions,
|
||||
CancellationToken ct);
|
||||
|
||||
/// <summary>Probes gateway/historian reachability.</summary>
|
||||
Task<bool> ProbeAsync(CancellationToken ct);
|
||||
|
||||
/// <summary>Reads the gateway's current historian connection status.</summary>
|
||||
Task<ConnectionStatus> GetConnectionStatusAsync(CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
/// <summary>
|
||||
/// Maps the driver-agnostic <see cref="HistoryAggregateType"/> (OPC UA Part 13 aggregate) onto the
|
||||
/// gateway's native <see cref="RetrievalMode"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Average/Minimum/Maximum line up with the legacy Wonderware client's aggregate mapping. The two
|
||||
/// remaining members are now served by <b>native</b> gateway retrieval modes:
|
||||
/// <see cref="HistoryAggregateType.Total"/> → <see cref="RetrievalMode.Integral"/> and
|
||||
/// <see cref="HistoryAggregateType.Count"/> → <see cref="RetrievalMode.Counter"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// This replaces the Wonderware-era client-side workarounds (Total derived as Average × interval,
|
||||
/// Count approximated from a value count): no client-side scaling is performed any more, so the
|
||||
/// gateway path is a strict improvement.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
internal static class AggregateModeMapper
|
||||
{
|
||||
/// <summary>Maps an aggregate function to the gateway retrieval mode.</summary>
|
||||
/// <param name="aggregate">The driver-agnostic aggregate function.</param>
|
||||
/// <returns>The matching gateway <see cref="RetrievalMode"/>.</returns>
|
||||
/// <exception cref="ArgumentOutOfRangeException">A future, unmapped enum member (fails the matrix guard).</exception>
|
||||
public static RetrievalMode ToRetrievalMode(HistoryAggregateType aggregate) => aggregate switch
|
||||
{
|
||||
HistoryAggregateType.Average => RetrievalMode.TimeWeightedAverage,
|
||||
HistoryAggregateType.Minimum => RetrievalMode.MinimumWithTime,
|
||||
HistoryAggregateType.Maximum => RetrievalMode.MaximumWithTime,
|
||||
HistoryAggregateType.Total => RetrievalMode.Integral,
|
||||
HistoryAggregateType.Count => RetrievalMode.Counter,
|
||||
_ => throw new ArgumentOutOfRangeException(
|
||||
nameof(aggregate), aggregate, "Unmapped HistoryAggregateType — add a RetrievalMode mapping."),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a driver-agnostic <see cref="AlarmHistorianEvent"/> onto a gateway wire
|
||||
/// <see cref="HistorianEvent"/> for the <c>SendEvent</c> write path.
|
||||
/// </summary>
|
||||
internal static class AlarmEventMapper
|
||||
{
|
||||
/// <summary>Maps an alarm historian event to a gateway event.</summary>
|
||||
/// <param name="alarm">The driver-agnostic alarm event.</param>
|
||||
/// <returns>The gateway wire event ready for <c>SendEvent</c>.</returns>
|
||||
public static HistorianEvent ToHistorianEvent(AlarmHistorianEvent alarm)
|
||||
{
|
||||
// Timestamp.FromDateTime requires a Utc-kind DateTime; coerce defensively (TimestampUtc is
|
||||
// already Utc by contract, but a caller could pass Unspecified).
|
||||
var eventTime = Timestamp.FromDateTime(DateTime.SpecifyKind(alarm.TimestampUtc, DateTimeKind.Utc));
|
||||
|
||||
var historianEvent = new HistorianEvent
|
||||
{
|
||||
// Deliberately DO NOT set HistorianEvent.Id: the gateway's SendEvent path rejects a
|
||||
// client-supplied event id (the server handler throws and the call fails permanently —
|
||||
// confirmed live). The historian assigns event identity server-side; the alarm's own id
|
||||
// is preserved below as a property for read-back correlation/traceability.
|
||||
SourceName = alarm.EquipmentPath,
|
||||
Type = alarm.AlarmTypeName,
|
||||
EventTime = eventTime,
|
||||
ReceivedTime = eventTime, // the server re-stamps the received time on the SQL path
|
||||
};
|
||||
|
||||
// Proto map<string,string> values must be non-null — only insert non-null properties.
|
||||
if (!string.IsNullOrWhiteSpace(alarm.AlarmId))
|
||||
historianEvent.Properties["AlarmId"] = alarm.AlarmId;
|
||||
historianEvent.Properties["AlarmName"] = alarm.AlarmName;
|
||||
historianEvent.Properties["EventKind"] = alarm.EventKind;
|
||||
historianEvent.Properties["Severity"] = alarm.Severity.ToString();
|
||||
historianEvent.Properties["User"] = alarm.User;
|
||||
historianEvent.Properties["Message"] = alarm.Message;
|
||||
if (alarm.Comment is not null)
|
||||
historianEvent.Properties["Comment"] = alarm.Comment;
|
||||
|
||||
return historianEvent;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
using System.Globalization;
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a gateway wire event (<see cref="HistorianEvent"/>) onto the driver-agnostic
|
||||
/// <see cref="HistoricalEvent"/> consumed by the Server's HistoryReadEvents path.
|
||||
/// </summary>
|
||||
internal static class EventMapper
|
||||
{
|
||||
/// <summary>OPC UA severity range (Part 9): 1 (lowest) … 1000 (highest).</summary>
|
||||
private const ushort MinSeverity = 1;
|
||||
private const ushort MaxSeverity = 1000;
|
||||
|
||||
/// <summary>Maps a single gateway event to a historical event.</summary>
|
||||
/// <param name="historianEvent">The gateway wire event.</param>
|
||||
/// <returns>The driver-agnostic historical event.</returns>
|
||||
public static HistoricalEvent ToHistoricalEvent(HistorianEvent historianEvent)
|
||||
{
|
||||
// Message: prefer the "Message" property, else fall back to the event Type (best-effort
|
||||
// render); never null-crash on a missing property.
|
||||
string? message;
|
||||
if (historianEvent.Properties.TryGetValue("Message", out var m) && !string.IsNullOrEmpty(m))
|
||||
message = m;
|
||||
else
|
||||
message = string.IsNullOrEmpty(historianEvent.Type) ? null : historianEvent.Type;
|
||||
|
||||
return new HistoricalEvent(
|
||||
EventId: historianEvent.Id,
|
||||
SourceName: string.IsNullOrEmpty(historianEvent.SourceName) ? null : historianEvent.SourceName,
|
||||
EventTimeUtc: historianEvent.EventTime?.ToDateTime() ?? default, // Utc kind
|
||||
ReceivedTimeUtc: historianEvent.ReceivedTime?.ToDateTime() ?? default, // Utc kind
|
||||
Message: message,
|
||||
Severity: ParseSeverity(historianEvent.Properties));
|
||||
}
|
||||
|
||||
/// <summary>Maps a batch of gateway events to historical events, in order.</summary>
|
||||
/// <param name="events">The gateway wire events.</param>
|
||||
/// <returns>The driver-agnostic historical events.</returns>
|
||||
public static IReadOnlyList<HistoricalEvent> ToHistoricalEvents(IEnumerable<HistorianEvent> events)
|
||||
{
|
||||
var result = new List<HistoricalEvent>();
|
||||
foreach (var historianEvent in events)
|
||||
result.Add(ToHistoricalEvent(historianEvent));
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses an OPC UA severity from the "Severity" property (else "Priority"), clamped to
|
||||
/// <c>[1, 1000]</c>. Missing or unparseable values default to the minimum severity (1).
|
||||
/// </summary>
|
||||
private static ushort ParseSeverity(IDictionary<string, string> properties)
|
||||
{
|
||||
string? raw = null;
|
||||
if (properties.TryGetValue("Severity", out var severity))
|
||||
raw = severity;
|
||||
else if (properties.TryGetValue("Priority", out var priority))
|
||||
raw = priority;
|
||||
|
||||
if (int.TryParse(raw, NumberStyles.Integer, CultureInfo.InvariantCulture, out var value))
|
||||
return (ushort)Math.Clamp(value, MinSeverity, MaxSeverity);
|
||||
|
||||
return MinSeverity;
|
||||
}
|
||||
}
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a raw OPC DA quality byte (the gateway's <c>opc_quality</c> field) to an OPC UA StatusCode
|
||||
/// uint.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Byte-identical port of the historical Wonderware client's <c>QualityMapper.Map</c> (itself a
|
||||
/// port of the original historian sidecar's <c>HistorianQualityMapper.Map</c>). Those projects have
|
||||
/// since been retired; this is now the canonical quality table. Parity with the OPC DA quality
|
||||
/// semantics is pinned by the per-byte tests.
|
||||
/// </remarks>
|
||||
internal static class GatewayQualityMapper
|
||||
{
|
||||
/// <summary>Maps an OPC DA quality byte to an OPC UA StatusCode.</summary>
|
||||
/// <param name="q">The OPC DA quality byte value.</param>
|
||||
/// <returns>An OPC UA StatusCode as a uint.</returns>
|
||||
public static uint Map(byte q) => q switch
|
||||
{
|
||||
// Good family (192+)
|
||||
192 => 0x00000000u, // Good
|
||||
216 => 0x00D80000u, // Good_LocalOverride
|
||||
|
||||
// Uncertain family (64-191)
|
||||
64 => 0x40000000u, // Uncertain
|
||||
68 => 0x40900000u, // Uncertain_LastUsableValue
|
||||
80 => 0x40930000u, // Uncertain_SensorNotAccurate
|
||||
84 => 0x40940000u, // Uncertain_EngineeringUnitsExceeded
|
||||
88 => 0x40950000u, // Uncertain_SubNormal
|
||||
|
||||
// Bad family (0-63)
|
||||
0 => 0x80000000u, // Bad
|
||||
4 => 0x80890000u, // Bad_ConfigurationError
|
||||
8 => 0x808A0000u, // Bad_NotConnected
|
||||
12 => 0x808B0000u, // Bad_DeviceFailure
|
||||
16 => 0x808C0000u, // Bad_SensorFailure
|
||||
20 => 0x80050000u, // Bad_CommunicationError
|
||||
24 => 0x808D0000u, // Bad_OutOfService
|
||||
32 => 0x80320000u, // Bad_WaitingForInitialData
|
||||
|
||||
// Unknown — fall back to category bucket so callers still get something usable.
|
||||
_ when q >= 192 => 0x00000000u,
|
||||
_ when q >= 64 => 0x40000000u,
|
||||
_ => 0x80000000u,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
/// <summary>
|
||||
/// Maps the driver-agnostic <see cref="DriverDataType"/> onto the gateway's
|
||||
/// <see cref="HistorianDataType"/> for tag provisioning + historical writes.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Only the nine numeric types are historizable on the gateway's analog write path. Two of them
|
||||
/// fall back to a wider historian type because the narrower one's write path is deferred upstream:
|
||||
/// <see cref="DriverDataType.UInt16"/> maps to <see cref="HistorianDataType.Uint4"/> (the historian's
|
||||
/// <c>UInt2</c> write path is not proven). String / DateTime / Reference are not historized in v1
|
||||
/// and throw <see cref="NotSupportedException"/>; callers that want to skip them without catching an
|
||||
/// exception should consult <see cref="IsHistorizable(DriverDataType)"/> first.
|
||||
/// </remarks>
|
||||
internal static class HistorianTypeMapper
|
||||
{
|
||||
/// <summary>Maps a driver data type to the historian data type used for provisioning/writes.</summary>
|
||||
/// <param name="dataType">The driver-agnostic data type.</param>
|
||||
/// <returns>The matching <see cref="HistorianDataType"/>.</returns>
|
||||
/// <exception cref="NotSupportedException">
|
||||
/// The type is explicitly deferred (string/datetime/reference) or a future, unclassified member.
|
||||
/// </exception>
|
||||
public static HistorianDataType ToHistorianDataType(DriverDataType dataType) => dataType switch
|
||||
{
|
||||
DriverDataType.Boolean => HistorianDataType.Int1,
|
||||
DriverDataType.Int16 => HistorianDataType.Int2,
|
||||
DriverDataType.Int32 => HistorianDataType.Int4,
|
||||
DriverDataType.Int64 => HistorianDataType.Int8,
|
||||
DriverDataType.UInt16 => HistorianDataType.Uint4, // UInt2 write path deferred upstream → widen
|
||||
DriverDataType.UInt32 => HistorianDataType.Uint4,
|
||||
DriverDataType.UInt64 => HistorianDataType.Uint8,
|
||||
DriverDataType.Float32 => HistorianDataType.Float,
|
||||
DriverDataType.Float64 => HistorianDataType.Double,
|
||||
DriverDataType.String or DriverDataType.DateTime or DriverDataType.Reference =>
|
||||
throw new NotSupportedException(
|
||||
$"DriverDataType.{dataType} is not historized in v1 " +
|
||||
"(string/datetime/reference writes are deferred — gated on the analog SQL write path)."),
|
||||
_ => throw new NotSupportedException(
|
||||
$"DriverDataType.{dataType} is not classified for historian write mapping — add a HistorianDataType mapping."),
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// True when <paramref name="dataType"/> is one of the nine historizable numeric types — lets the
|
||||
/// provisioning hook skip deferred types without catching <see cref="NotSupportedException"/>.
|
||||
/// </summary>
|
||||
/// <param name="dataType">The driver-agnostic data type.</param>
|
||||
public static bool IsHistorizable(DriverDataType dataType) => dataType switch
|
||||
{
|
||||
DriverDataType.Boolean
|
||||
or DriverDataType.Int16
|
||||
or DriverDataType.Int32
|
||||
or DriverDataType.Int64
|
||||
or DriverDataType.UInt16
|
||||
or DriverDataType.UInt32
|
||||
or DriverDataType.UInt64
|
||||
or DriverDataType.Float32
|
||||
or DriverDataType.Float64 => true,
|
||||
_ => false,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Mapping;
|
||||
|
||||
/// <summary>
|
||||
/// Maps gateway wire samples (<see cref="HistorianSample"/> / <see cref="HistorianAggregateSample"/>)
|
||||
/// onto the driver-agnostic <see cref="DataValueSnapshot"/>, mirroring the legacy Wonderware client's
|
||||
/// <c>ToSnapshots</c> / <c>ToAggregateSnapshots</c> conventions.
|
||||
/// </summary>
|
||||
internal static class SampleMapper
|
||||
{
|
||||
private const uint StatusGood = 0x00000000u;
|
||||
private const uint StatusBadNoData = 0x800E0000u;
|
||||
|
||||
/// <summary>OPC DA "Good" family floor — a quality byte at/above this carries usable data.</summary>
|
||||
private const byte GoodQualityFloor = 192;
|
||||
|
||||
/// <summary>Maps a single raw sample to a value snapshot.</summary>
|
||||
/// <param name="sample">The gateway raw sample.</param>
|
||||
/// <returns>The driver-agnostic snapshot.</returns>
|
||||
public static DataValueSnapshot ToSnapshot(HistorianSample sample)
|
||||
{
|
||||
// proto3 explicit presence: prefer the numeric value, else the string value, else null.
|
||||
object? value;
|
||||
if (sample.HasNumericValue)
|
||||
value = sample.NumericValue; // boxes as System.Double
|
||||
else if (sample.HasStringValue)
|
||||
value = sample.StringValue;
|
||||
else
|
||||
value = null;
|
||||
|
||||
// Prefer the OPC DA quality byte (opc_quality); the gateway populates it directly from the
|
||||
// SDK's OpcQuality, so it is the authoritative byte for GatewayQualityMapper. Fall back to
|
||||
// the historian quality field only when opc_quality is unset (0).
|
||||
byte qualityByte = sample.OpcQuality != 0 ? (byte)sample.OpcQuality : (byte)sample.Quality;
|
||||
|
||||
return new DataValueSnapshot(
|
||||
Value: value,
|
||||
StatusCode: GatewayQualityMapper.Map(qualityByte),
|
||||
SourceTimestampUtc: sample.Timestamp?.ToDateTime(), // Utc kind
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
|
||||
/// <summary>Maps a batch of raw samples to value snapshots, in order.</summary>
|
||||
/// <param name="samples">The gateway raw samples.</param>
|
||||
/// <returns>The driver-agnostic snapshots.</returns>
|
||||
public static IReadOnlyList<DataValueSnapshot> ToSnapshots(IEnumerable<HistorianSample> samples)
|
||||
{
|
||||
var result = new List<DataValueSnapshot>();
|
||||
foreach (var sample in samples)
|
||||
result.Add(ToSnapshot(sample));
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>Maps a single aggregate bucket to a value snapshot.</summary>
|
||||
/// <param name="aggregate">The gateway aggregate sample.</param>
|
||||
/// <returns>The driver-agnostic snapshot.</returns>
|
||||
/// <remarks>
|
||||
/// Unlike the legacy Wonderware DTO (a nullable <c>double?</c>), the gateway proto carries a
|
||||
/// non-optional <c>double value</c>, so an unavailable (no-data) bucket cannot be signalled by a
|
||||
/// null value. Instead it is signalled by a non-Good <c>opc_quality</c>: a Good bucket
|
||||
/// (<c>opc_quality >= 192</c>) yields its value with a Good status, anything else maps to
|
||||
/// <c>BadNoData</c> with a null value — preserving the Wonderware aggregate contract (binary
|
||||
/// Good-with-value / BadNoData-null).
|
||||
/// </remarks>
|
||||
public static DataValueSnapshot ToAggregateSnapshot(HistorianAggregateSample aggregate)
|
||||
{
|
||||
bool hasData = aggregate.OpcQuality >= GoodQualityFloor;
|
||||
return new DataValueSnapshot(
|
||||
Value: hasData ? aggregate.Value : null, // boxes as System.Double when present
|
||||
StatusCode: hasData ? StatusGood : StatusBadNoData,
|
||||
SourceTimestampUtc: (aggregate.EndTime ?? aggregate.StartTime)?.ToDateTime(), // bucket timestamp
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
|
||||
/// <summary>Maps a batch of aggregate buckets to value snapshots, in order.</summary>
|
||||
/// <param name="aggregates">The gateway aggregate samples.</param>
|
||||
/// <returns>The driver-agnostic snapshots.</returns>
|
||||
public static IReadOnlyList<DataValueSnapshot> ToAggregateSnapshots(IEnumerable<HistorianAggregateSample> aggregates)
|
||||
{
|
||||
var result = new List<DataValueSnapshot>();
|
||||
foreach (var aggregate in aggregates)
|
||||
result.Add(ToAggregateSnapshot(aggregate));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
+289
@@ -0,0 +1,289 @@
|
||||
using FASTER.core;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Recorder;
|
||||
|
||||
/// <summary>
|
||||
/// Crash-safe, FIFO <see cref="IHistorizationOutbox"/> backed by a single FasterLog (append-only
|
||||
/// persistent log) under <c><directory>/hlog.log</c>. Maps the outbox onto FasterLog:
|
||||
/// append → <see cref="FasterLog.EnqueueAsync(byte[], CancellationToken)"/>; peek → forward scan
|
||||
/// from the logical head; remove → <see cref="FasterLog.TruncateUntil"/> (head advance + reclaim)
|
||||
/// + commit. In-memory FIFO state (entry id → log start address) is rebuilt from the committed log
|
||||
/// by a one-pass startup scan, so acked truncations survive an unclean restart.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Mirrors the gateway's <c>FasterLogOutboxStore</c> and adds a bounded-capacity drop-oldest
|
||||
/// policy: when an append would exceed <c>capacity</c>, the head is advanced past the oldest
|
||||
/// entry (truncate + commit) and <see cref="DroppedCount"/> is incremented. Assumes serialized
|
||||
/// appends (the recorder actor processes messages sequentially); the lock protects the in-memory
|
||||
/// index, and FasterLog itself tolerates concurrent enqueue/scan.
|
||||
/// </remarks>
|
||||
public sealed class FasterLogHistorizationOutbox : IHistorizationOutbox
|
||||
{
|
||||
private readonly record struct LiveEntry(Guid Id, long Start);
|
||||
|
||||
private readonly ManagedLocalStorageDevice _device;
|
||||
private readonly FasterLog _log;
|
||||
private readonly Lock _state = new();
|
||||
private readonly HistorizationCommitMode _commitMode;
|
||||
private readonly int _capacity;
|
||||
|
||||
// Periodic-mode auto-commit machinery (null under PerEntry). The CTS stops the loop, the timer
|
||||
// paces it, and the loop Task is retained so Dispose can await it (never leaving an unobserved Task).
|
||||
private readonly CancellationTokenSource? _periodicCommitCts;
|
||||
private readonly PeriodicTimer? _periodicCommitTimer;
|
||||
private readonly Task? _periodicCommitLoop;
|
||||
private bool _disposed;
|
||||
|
||||
// FIFO of live (appended-but-not-acked) entries with their FasterLog start addresses, plus an id
|
||||
// index for O(1) remove. All three (+ _nextScanAddress, _droppedCount) are read/written under _state.
|
||||
private readonly LinkedList<LiveEntry> _live = new();
|
||||
private readonly Dictionary<Guid, LinkedListNode<LiveEntry>> _index = new();
|
||||
private long _nextScanAddress; // authoritative logical head; peeks scan from here
|
||||
private long _droppedCount;
|
||||
|
||||
/// <summary>
|
||||
/// Opens (or recovers) the FasterLog-backed outbox under <paramref name="directory"/>.
|
||||
/// </summary>
|
||||
/// <param name="directory">Directory holding the FasterLog segment + commit files.</param>
|
||||
/// <param name="commitMode">
|
||||
/// <see cref="HistorizationCommitMode.PerEntry"/> fsyncs before each append returns;
|
||||
/// <see cref="HistorizationCommitMode.Periodic"/> commits on a background timer every
|
||||
/// <paramref name="commitIntervalMs"/> ms.
|
||||
/// </param>
|
||||
/// <param name="commitIntervalMs">Periodic-mode commit cadence in ms; must be positive when Periodic.</param>
|
||||
/// <param name="capacity">
|
||||
/// Maximum un-acked entries before drop-oldest kicks in; <c>0</c> (default) means unbounded.
|
||||
/// </param>
|
||||
public FasterLogHistorizationOutbox(
|
||||
string directory,
|
||||
HistorizationCommitMode commitMode = HistorizationCommitMode.PerEntry,
|
||||
int commitIntervalMs = 100,
|
||||
int capacity = 0)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(directory);
|
||||
ArgumentOutOfRangeException.ThrowIfNegative(capacity);
|
||||
if (commitMode == HistorizationCommitMode.Periodic)
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(commitIntervalMs);
|
||||
|
||||
Directory.CreateDirectory(directory);
|
||||
_commitMode = commitMode;
|
||||
_capacity = capacity;
|
||||
_device = new ManagedLocalStorageDevice(Path.Combine(directory, "hlog.log"));
|
||||
_log = new FasterLog(new FasterLogSettings { LogDevice = _device });
|
||||
RecoverState(); // sets _nextScanAddress + rebuilds _live/_index from the committed log
|
||||
|
||||
if (_commitMode == HistorizationCommitMode.Periodic)
|
||||
{
|
||||
_periodicCommitCts = new CancellationTokenSource();
|
||||
_periodicCommitTimer = new PeriodicTimer(TimeSpan.FromMilliseconds(commitIntervalMs));
|
||||
// Started after RecoverState so it never races a half-recovered instance.
|
||||
_periodicCommitLoop = RunPeriodicCommitLoopAsync(_periodicCommitTimer, _periodicCommitCts.Token);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public long DroppedCount
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_state)
|
||||
{
|
||||
return _droppedCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask AppendAsync(HistorizationOutboxEntry entry, CancellationToken ct)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(entry);
|
||||
|
||||
byte[] payload = HistorizationOutboxEntrySerializer.Serialize(entry);
|
||||
long startAddress = await _log.EnqueueAsync(payload, ct).ConfigureAwait(false);
|
||||
if (_commitMode == HistorizationCommitMode.PerEntry)
|
||||
{
|
||||
// PerEntry: durable before returning. Periodic skips this — the background timer (and
|
||||
// Dispose) commit on their cadence (accepted throughput/latency trade-off).
|
||||
await _log.CommitAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
long? truncateTo = null;
|
||||
lock (_state)
|
||||
{
|
||||
LinkedListNode<LiveEntry> node = _live.AddLast(new LiveEntry(entry.Id, startAddress));
|
||||
_index[entry.Id] = node;
|
||||
|
||||
// Drop-oldest on overflow. The new head is the start address of whatever entry survives
|
||||
// at the front (or the tail if the log emptied); truncate to the furthest such address.
|
||||
while (_capacity > 0 && _live.Count > _capacity)
|
||||
{
|
||||
LinkedListNode<LiveEntry> oldest = _live.First!;
|
||||
truncateTo = oldest.Next?.Value.Start ?? _log.TailAddress;
|
||||
_index.Remove(oldest.Value.Id);
|
||||
_live.RemoveFirst();
|
||||
_droppedCount++;
|
||||
}
|
||||
|
||||
if (truncateTo is long head)
|
||||
_nextScanAddress = head;
|
||||
}
|
||||
|
||||
if (truncateTo is long truncateAddr)
|
||||
{
|
||||
_log.TruncateUntil(truncateAddr);
|
||||
await _log.CommitAsync(ct).ConfigureAwait(false); // make the drop durable
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public ValueTask<IReadOnlyList<HistorizationOutboxEntry>> PeekBatchAsync(int max, CancellationToken ct)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(max);
|
||||
|
||||
var batch = new List<HistorizationOutboxEntry>(Math.Min(max, 64));
|
||||
lock (_state)
|
||||
{
|
||||
using FasterLogScanIterator iter = _log.Scan(_nextScanAddress, _log.TailAddress, recover: false);
|
||||
while (batch.Count < max && iter.GetNext(out byte[] bytes, out int len, out _, out _))
|
||||
{
|
||||
batch.Add(HistorizationOutboxEntrySerializer.Deserialize(bytes.AsSpan(0, len)));
|
||||
}
|
||||
}
|
||||
|
||||
return ValueTask.FromResult<IReadOnlyList<HistorizationOutboxEntry>>(batch);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask RemoveAsync(Guid id, CancellationToken ct)
|
||||
{
|
||||
long truncateTo;
|
||||
lock (_state)
|
||||
{
|
||||
if (!_index.TryGetValue(id, out LinkedListNode<LiveEntry>? node))
|
||||
return; // unknown / already removed -> defensive no-op
|
||||
|
||||
truncateTo = node.Next?.Value.Start ?? _log.TailAddress;
|
||||
// FIFO ack: remove the target plus any older entries still ahead of it.
|
||||
while (_live.First is { } first)
|
||||
{
|
||||
bool isTarget = ReferenceEquals(first, node);
|
||||
_index.Remove(first.Value.Id);
|
||||
_live.RemoveFirst();
|
||||
if (isTarget)
|
||||
break;
|
||||
}
|
||||
|
||||
_nextScanAddress = truncateTo;
|
||||
}
|
||||
|
||||
_log.TruncateUntil(truncateTo);
|
||||
await _log.CommitAsync(ct).ConfigureAwait(false); // make the head advance durable
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public ValueTask<int> CountAsync(CancellationToken ct)
|
||||
{
|
||||
lock (_state)
|
||||
{
|
||||
return ValueTask.FromResult(_live.Count);
|
||||
}
|
||||
}
|
||||
|
||||
// Rebuild the in-memory FIFO index from the committed log after a restart. The FasterLog ctor has
|
||||
// already recovered BeginAddress/TailAddress from the on-disk commit metadata, so scanning
|
||||
// [BeginAddress, TailAddress) yields exactly the untruncated (un-acked) records in FIFO order, and
|
||||
// BeginAddress is the recovered logical head.
|
||||
//
|
||||
// CTOR-ONLY: called once before the instance is published and before the periodic-commit loop
|
||||
// starts. It unconditionally seeds _nextScanAddress/_live/_index, so it must NEVER run post-ctor.
|
||||
//
|
||||
// Capacity note: if a crash lands between an append's commit and the subsequent drop-oldest
|
||||
// truncation commit, recovery scans the still-present oldest record and may transiently rebuild
|
||||
// _live with MORE than _capacity entries. This self-corrects on the next AppendAsync — its
|
||||
// drop-oldest while-loop runs until _live.Count <= _capacity, so the overflow converges away.
|
||||
private void RecoverState()
|
||||
{
|
||||
_nextScanAddress = _log.BeginAddress;
|
||||
|
||||
using FasterLogScanIterator iter = _log.Scan(_log.BeginAddress, _log.TailAddress, recover: false);
|
||||
while (iter.GetNext(out byte[] bytes, out int len, out long currentAddress, out _))
|
||||
{
|
||||
HistorizationOutboxEntry entry = HistorizationOutboxEntrySerializer.Deserialize(bytes.AsSpan(0, len));
|
||||
LinkedListNode<LiveEntry> node = _live.AddLast(new LiveEntry(entry.Id, currentAddress));
|
||||
_index[entry.Id] = node;
|
||||
}
|
||||
}
|
||||
|
||||
// Periodic-mode auto-commit: best-effort _log.Commit every interval until cancelled. Commit
|
||||
// failures are swallowed so the loop survives transient errors; the per-remove/per-drop commits
|
||||
// and Dispose's final spin-wait commit still bound durability.
|
||||
private async Task RunPeriodicCommitLoopAsync(PeriodicTimer timer, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
while (await timer.WaitForNextTickAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
try
|
||||
{
|
||||
_log.Commit(spinWait: false);
|
||||
}
|
||||
catch (FasterException)
|
||||
{
|
||||
// Transient/teardown commit failure — keep ticking.
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Expected on Dispose: the CTS cancelled the wait. Normal teardown.
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the periodic-commit loop (Periodic mode), flushes a final commit (best-effort), and
|
||||
/// releases the log + device. Idempotent.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed)
|
||||
return;
|
||||
_disposed = true;
|
||||
|
||||
// Stop the periodic loop BEFORE the final commit so we don't race the loop's Commit against
|
||||
// the teardown commit / log dispose. Await the loop (it absorbs cancellation) so no Task leaks.
|
||||
if (_periodicCommitCts is not null)
|
||||
{
|
||||
_periodicCommitCts.Cancel();
|
||||
try
|
||||
{
|
||||
_periodicCommitLoop?.GetAwaiter().GetResult();
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Cancellation is the expected stop signal — not an error.
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// The loop faulted on a non-Faster commit error during teardown (e.g. an
|
||||
// ObjectDisposedException as the device tears down); swallow — Dispose must not
|
||||
// throw. Already-committed enqueues remain durable.
|
||||
}
|
||||
|
||||
_periodicCommitTimer?.Dispose();
|
||||
_periodicCommitCts.Dispose();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
_log.Commit(spinWait: true);
|
||||
}
|
||||
catch (FasterException)
|
||||
{
|
||||
// Best-effort final commit on teardown: already-committed enqueues remain durable.
|
||||
}
|
||||
|
||||
_log.Dispose();
|
||||
_device.Dispose();
|
||||
}
|
||||
}
|
||||
+95
@@ -0,0 +1,95 @@
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Recorder;
|
||||
|
||||
/// <summary>
|
||||
/// Adapts the gateway client's <c>WriteLiveValues</c> RPC to the Runtime recorder's
|
||||
/// <see cref="IHistorianValueWriter"/> seam. Maps each <see cref="HistorizationValue"/> onto a
|
||||
/// proto <see cref="HistorianLiveValue"/> (numeric value + quality, with an optional timestamp —
|
||||
/// a null timestamp leaves the proto field unset so the gateway's SQL writer server-stamps the
|
||||
/// current time) and folds the returned <see cref="WriteAck"/> to a single retry/ack boolean.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Non-throwing by contract.</b> The recorder's drain loop stays simple by treating the
|
||||
/// writer as never throwing: any gateway/transport error (and a non-success, non-queued ack)
|
||||
/// is mapped to <c>false</c> so the recorder retains the outbox entries and retries. Only the
|
||||
/// failure category (the exception type name) is logged — never tag values, hostnames, or
|
||||
/// credentials.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// A success ack OR a store-forward-queued ack maps to <c>true</c>: a value the gateway
|
||||
/// durably queued must not be re-drained.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class GatewayHistorianValueWriter : IHistorianValueWriter, IAsyncDisposable
|
||||
{
|
||||
private readonly IHistorianGatewayClient _client;
|
||||
private readonly ILogger<GatewayHistorianValueWriter> _logger;
|
||||
|
||||
/// <summary>Creates the writer over a gateway client seam.</summary>
|
||||
/// <param name="client">The gateway client used for the <c>WriteLiveValues</c> write path.</param>
|
||||
/// <param name="logger">Logger for failure-category diagnostics (never logs value content).</param>
|
||||
public GatewayHistorianValueWriter(IHistorianGatewayClient client, ILogger<GatewayHistorianValueWriter> logger)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> WriteLiveValuesAsync(
|
||||
string tag, IReadOnlyList<HistorizationValue> values, CancellationToken ct)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(tag);
|
||||
ArgumentNullException.ThrowIfNull(values);
|
||||
|
||||
if (values.Count == 0)
|
||||
{
|
||||
// Nothing to write is a trivially-successful ack — the recorder treats it as drained.
|
||||
return true;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var liveValues = new List<HistorianLiveValue>(values.Count);
|
||||
foreach (HistorizationValue value in values)
|
||||
{
|
||||
var live = new HistorianLiveValue
|
||||
{
|
||||
NumericValue = value.Value,
|
||||
Quality = value.Quality,
|
||||
};
|
||||
|
||||
if (value.TimestampUtc is { } timestampUtc)
|
||||
{
|
||||
// Timestamp.FromDateTime requires Utc kind; coerce defensively. A null timestamp
|
||||
// leaves the proto field unset -> the gateway's SQL writer server-stamps now.
|
||||
live.Timestamp = Timestamp.FromDateTime(DateTime.SpecifyKind(timestampUtc, DateTimeKind.Utc));
|
||||
}
|
||||
|
||||
liveValues.Add(live);
|
||||
}
|
||||
|
||||
WriteAck ack = await _client.WriteLiveValuesAsync(tag, liveValues, ct).ConfigureAwait(false);
|
||||
return ack.Success || ack.Queued;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
// NEVER throw out of the writer — the recorder's drain expects a bool so its retain/retry
|
||||
// logic stays simple. Log only the failure category (no value content, hostnames, or creds).
|
||||
_logger.LogDebug(
|
||||
"WriteLiveValues failed ({Exception}); recorder will retain and retry.",
|
||||
exception.GetType().Name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disposes the underlying gateway client (and its gRPC channel). The DI container owns this
|
||||
/// writer as a singleton, so this fires once at host shutdown — closing the channel gracefully.
|
||||
/// </summary>
|
||||
public ValueTask DisposeAsync() => _client.DisposeAsync();
|
||||
}
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
using System.Buffers.Binary;
|
||||
using System.Text;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Recorder;
|
||||
|
||||
/// <summary>
|
||||
/// Compact, allocation-light little-endian binary (de)serializer for
|
||||
/// <see cref="HistorizationOutboxEntry"/> records persisted to the FasterLog outbox. The entry is
|
||||
/// all primitives, so a fixed binary layout is smaller and faster than JSON and avoids any
|
||||
/// reflection at the durable boundary.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Layout (little-endian): <c>Guid(16) | tagByteLen:int32(4) | tagUtf8(n) | value:double(8) |
|
||||
/// quality:uint16(2) | timestamp:int64(8)</c>. The timestamp is <see cref="DateTime.ToBinary"/>,
|
||||
/// which round-trips <see cref="DateTimeKind"/>.
|
||||
/// </remarks>
|
||||
internal static class HistorizationOutboxEntrySerializer
|
||||
{
|
||||
/// <summary>Serializes <paramref name="entry"/> to a fixed-layout little-endian byte array.</summary>
|
||||
public static byte[] Serialize(HistorizationOutboxEntry entry)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(entry);
|
||||
|
||||
int tagLen = Encoding.UTF8.GetByteCount(entry.Tag);
|
||||
var buffer = new byte[16 + 4 + tagLen + 8 + 2 + 8];
|
||||
Span<byte> span = buffer;
|
||||
|
||||
entry.Id.TryWriteBytes(span[..16]);
|
||||
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(16, 4), tagLen);
|
||||
Encoding.UTF8.GetBytes(entry.Tag, span.Slice(20, tagLen));
|
||||
|
||||
int p = 20 + tagLen;
|
||||
BinaryPrimitives.WriteDoubleLittleEndian(span.Slice(p, 8), entry.NumericValue);
|
||||
p += 8;
|
||||
BinaryPrimitives.WriteUInt16LittleEndian(span.Slice(p, 2), entry.Quality);
|
||||
p += 2;
|
||||
BinaryPrimitives.WriteInt64LittleEndian(span.Slice(p, 8), entry.TimestampUtc.ToBinary());
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/// <summary>Reconstructs a <see cref="HistorizationOutboxEntry"/> from its serialized bytes.</summary>
|
||||
public static HistorizationOutboxEntry Deserialize(ReadOnlySpan<byte> span)
|
||||
{
|
||||
var id = new Guid(span[..16]);
|
||||
int tagLen = BinaryPrimitives.ReadInt32LittleEndian(span.Slice(16, 4));
|
||||
string tag = Encoding.UTF8.GetString(span.Slice(20, tagLen));
|
||||
|
||||
int p = 20 + tagLen;
|
||||
double value = BinaryPrimitives.ReadDoubleLittleEndian(span.Slice(p, 8));
|
||||
p += 8;
|
||||
ushort quality = BinaryPrimitives.ReadUInt16LittleEndian(span.Slice(p, 2));
|
||||
p += 2;
|
||||
long timestamp = BinaryPrimitives.ReadInt64LittleEndian(span.Slice(p, 8));
|
||||
|
||||
return new HistorizationOutboxEntry(id, tag, value, quality, DateTime.FromBinary(timestamp));
|
||||
}
|
||||
}
|
||||
+34
@@ -0,0 +1,34 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<NoWarn>$(NoWarn);CS1591</NoWarn>
|
||||
<Platforms>AnyCPU;x64</Platforms>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.Abstractions\ZB.MOM.WW.OtOpcUa.Core.Abstractions.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.csproj"/>
|
||||
<!-- Runtime owns ServerHistorianOptions (the bound ServerHistorian config). The read-cutover
|
||||
factory (GatewayHistorian.CreateDataSource / HistorianGatewayClientAdapter.Create) maps
|
||||
those options onto the package client, so the driver references it. Runtime references no
|
||||
driver, so this is a diamond (Host -> {Runtime, Gateway}, Gateway -> Runtime), not a cycle. -->
|
||||
<ProjectReference Include="..\..\Server\ZB.MOM.WW.OtOpcUa.Runtime\ZB.MOM.WW.OtOpcUa.Runtime.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="ZB.MOM.WW.HistorianGateway.Client" />
|
||||
<PackageReference Include="ZB.MOM.WW.HistorianGateway.Contracts" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<!-- FasterLog-backed durable historization outbox (Recorder/FasterLogHistorizationOutbox).
|
||||
Pure-managed FasterLog; same package the gateway's store-forward outbox uses. -->
|
||||
<PackageReference Include="Microsoft.FASTER.Core" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Tests"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
-71
@@ -1,71 +0,0 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
|
||||
|
||||
/// <summary>
|
||||
/// Connection options for <c>WonderwareHistorianClient</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Retry / backoff ownership (finding 006):</b> this module performs exactly one
|
||||
/// in-place transport reconnect inside <c>FrameChannel.InvokeAsync</c> with no delay,
|
||||
/// and does NOT implement exponential reconnect backoff. Broader retry/backoff is the
|
||||
/// caller's responsibility — the alarm drain worker
|
||||
/// (<c>Core.AlarmHistorian.SqliteStoreAndForwardSink</c>) and the read-side
|
||||
/// history router are expected to layer their own backoff on top.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
/// <param name="Host">Sidecar TCP host (DNS name or IP) the client dials.</param>
|
||||
/// <param name="Port">Sidecar TCP port (matches the sidecar's <c>OTOPCUA_HISTORIAN_TCP_PORT</c>). Valid range: 1–65535.</param>
|
||||
/// <param name="SharedSecret">Per-process shared secret the sidecar will verify in the Hello frame.</param>
|
||||
/// <param name="PeerName">Diagnostic peer identifier sent in Hello — typically the OtOpcUa instance id.</param>
|
||||
/// <param name="ConnectTimeout">Cap on the TCP connect + Hello round trip on each (re)connect.</param>
|
||||
/// <param name="CallTimeout">Cap on a single read/write call once connected.</param>
|
||||
public sealed record WonderwareHistorianClientOptions(
|
||||
string Host,
|
||||
[Range(1, 65535)] int Port,
|
||||
string SharedSecret,
|
||||
string PeerName = "OtOpcUa",
|
||||
TimeSpan? ConnectTimeout = null,
|
||||
TimeSpan? CallTimeout = null)
|
||||
{
|
||||
/// <summary>Gets the effective connect timeout, using the default if not explicitly set.</summary>
|
||||
public TimeSpan EffectiveConnectTimeout => ConnectTimeout ?? TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <summary>Gets the effective call timeout, using the default if not explicitly set.</summary>
|
||||
public TimeSpan EffectiveCallTimeout => CallTimeout ?? TimeSpan.FromSeconds(30);
|
||||
|
||||
/// <summary>
|
||||
/// Timeout for the AdminUI Test Connect probe, in seconds. The AdminUI clamps to a
|
||||
/// 60s server-side maximum; this default is what the form pre-fills for new instances.
|
||||
/// </summary>
|
||||
[Display(Name = "Probe timeout (seconds)", Description = "Connection test timeout. Default 15s.", GroupName = "Diagnostics")]
|
||||
[Range(1, 60)]
|
||||
public int ProbeTimeoutSeconds { get; init; } = 15;
|
||||
|
||||
/// <summary>When true, the client wraps the TCP stream in TLS before the Hello handshake.</summary>
|
||||
public bool UseTls { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional SHA-1 thumbprint (40 hex characters, no spaces, case-insensitive) the client
|
||||
/// pins the sidecar's TLS server cert against. When null/empty and
|
||||
/// <see cref="UseTls"/> is true, the client validates the cert chain normally
|
||||
/// (CA-issued cert).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The consumer matches against <c>X509Certificate.GetCertHashString()</c> (SHA-1, 40
|
||||
/// hex chars). Supplying a SHA-256 thumbprint (64 hex chars, the format shown by modern
|
||||
/// tooling such as <c>certutil</c> or Windows Certificate Manager) will never match and
|
||||
/// will cause the TLS handshake to fail silently. Only 40-character SHA-1 hex strings
|
||||
/// are accepted.
|
||||
/// </remarks>
|
||||
public string? ServerCertThumbprint { get; init; }
|
||||
|
||||
/// <inheritdoc/>
|
||||
/// <remarks>
|
||||
/// Redacts <see cref="SharedSecret"/> so the value cannot appear in log output when the
|
||||
/// options object is passed to a structured-logging statement.
|
||||
/// </remarks>
|
||||
public override string ToString() =>
|
||||
$"WonderwareHistorianClientOptions {{ Host={Host}, Port={Port}, PeerName={PeerName}, UseTls={UseTls}, ServerCertThumbprint={ServerCertThumbprint ?? "<null>"} }}";
|
||||
}
|
||||
-9
@@ -1,9 +0,0 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
<!-- NO PackageReference. NO ProjectReference. -->
|
||||
</Project>
|
||||
-230
@@ -1,230 +0,0 @@
|
||||
using System.Net.Security;
|
||||
using System.Net.Sockets;
|
||||
using System.Security.Authentication;
|
||||
using MessagePack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Owns one TCP connection to the Wonderware historian sidecar. Handles the Hello
|
||||
/// handshake, serializes outgoing requests + waits for the matching reply frame, and
|
||||
/// reconnects on transport failure with exponential backoff.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Single in-flight call at a time — the sidecar's TCP protocol is request/response
|
||||
/// over a single bidirectional stream, so multiple concurrent <see cref="InvokeAsync"/>
|
||||
/// calls would interleave replies. A <see cref="SemaphoreSlim"/> serializes them. PR 6.x
|
||||
/// can layer batching on top.
|
||||
/// </remarks>
|
||||
internal sealed class FrameChannel : IAsyncDisposable
|
||||
{
|
||||
private readonly WonderwareHistorianClientOptions _options;
|
||||
private readonly Func<CancellationToken, Task<Stream>> _connect;
|
||||
private readonly ILogger _logger;
|
||||
private readonly SemaphoreSlim _callGate = new(1, 1);
|
||||
|
||||
private Stream? _stream;
|
||||
private FrameReader? _reader;
|
||||
private FrameWriter? _writer;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Default TCP factory: connects to the sidecar over TCP, optionally wrapping the stream
|
||||
/// in TLS (server-auth; pinned-thumbprint or CA-chain validation). The Hello handshake +
|
||||
/// shared secret still authenticate the caller on top of this.
|
||||
/// </summary>
|
||||
public static readonly Func<WonderwareHistorianClientOptions, CancellationToken, Task<Stream>> DefaultTcpConnectFactory =
|
||||
async (opts, ct) =>
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(opts.Host))
|
||||
throw new InvalidOperationException("WonderwareHistorianClientOptions.Host is required for the TCP transport.");
|
||||
|
||||
var tcp = new TcpClient();
|
||||
try
|
||||
{
|
||||
using var connectCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
connectCts.CancelAfter(opts.EffectiveConnectTimeout);
|
||||
await tcp.ConnectAsync(opts.Host, opts.Port, connectCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
tcp.Dispose();
|
||||
throw;
|
||||
}
|
||||
tcp.NoDelay = true;
|
||||
|
||||
// The returned NetworkStream owns the socket (TcpClient.GetStream() uses ownsSocket: true),
|
||||
// so FrameChannel.ResetTransport() disposing this stream closes the underlying socket.
|
||||
Stream stream = tcp.GetStream();
|
||||
if (!opts.UseTls) return stream;
|
||||
|
||||
var ssl = new SslStream(stream, leaveInnerStreamOpen: false, (_, cert, _, errors) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(opts.ServerCertThumbprint))
|
||||
return string.Equals(cert?.GetCertHashString(), opts.ServerCertThumbprint, StringComparison.OrdinalIgnoreCase);
|
||||
return errors == SslPolicyErrors.None;
|
||||
});
|
||||
try
|
||||
{
|
||||
await ssl.AuthenticateAsClientAsync(new SslClientAuthenticationOptions { TargetHost = opts.Host }, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
await ssl.DisposeAsync().ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
return ssl;
|
||||
};
|
||||
|
||||
/// <summary>Initializes a new instance of the <see cref="FrameChannel"/> class.</summary>
|
||||
/// <param name="options">Configuration options for the historian client.</param>
|
||||
/// <param name="connect">Function to establish a connection stream.</param>
|
||||
/// <param name="logger">Logger instance for diagnostics.</param>
|
||||
public FrameChannel(
|
||||
WonderwareHistorianClientOptions options,
|
||||
Func<CancellationToken, Task<Stream>> connect,
|
||||
ILogger logger)
|
||||
{
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_connect = connect ?? throw new ArgumentNullException(nameof(connect));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <summary>Gets a value indicating whether the channel is currently connected.</summary>
|
||||
public bool IsConnected => _stream is not null;
|
||||
|
||||
/// <summary>
|
||||
/// Connects + performs the Hello handshake. Returns when the sidecar has accepted the
|
||||
/// hello. Throws on rejection (bad secret, version mismatch, or transport failure).
|
||||
/// </summary>
|
||||
/// <param name="ct">Cancellation token to stop the operation.</param>
|
||||
/// <returns>A task representing the asynchronous connection operation.</returns>
|
||||
public async Task ConnectAsync(CancellationToken ct)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
await _callGate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
await ConnectInternalAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
finally { _callGate.Release(); }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sends one request, waits for the matching reply. On transport failure, reconnects
|
||||
/// once and retries — broader retry policy lives in the calling layer.
|
||||
/// </summary>
|
||||
/// <typeparam name="TRequest">The type of the request payload.</typeparam>
|
||||
/// <typeparam name="TReply">The type of the reply payload.</typeparam>
|
||||
/// <param name="requestKind">The message kind of the request.</param>
|
||||
/// <param name="expectedReplyKind">The expected message kind of the reply.</param>
|
||||
/// <param name="request">The request payload to send.</param>
|
||||
/// <param name="cancellationToken">Cancellation token to stop the operation.</param>
|
||||
/// <returns>A task that returns the reply payload.</returns>
|
||||
public async Task<TReply> InvokeAsync<TRequest, TReply>(
|
||||
MessageKind requestKind,
|
||||
MessageKind expectedReplyKind,
|
||||
TRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
where TReply : class
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
using var timeout = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
timeout.CancelAfter(_options.EffectiveCallTimeout);
|
||||
|
||||
await _callGate.WaitAsync(timeout.Token).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
// Lazy connect on first call.
|
||||
if (_stream is null) await ConnectInternalAsync(timeout.Token).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
return await ExchangeAsync<TRequest, TReply>(requestKind, expectedReplyKind, request, timeout.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (ex is IOException or EndOfStreamException or ObjectDisposedException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Sidecar TCP transport failure on {Kind}; reconnecting", requestKind);
|
||||
ResetTransport();
|
||||
await ConnectInternalAsync(timeout.Token).ConfigureAwait(false);
|
||||
// One retry. If the second attempt also fails, propagate.
|
||||
return await ExchangeAsync<TRequest, TReply>(requestKind, expectedReplyKind, request, timeout.Token).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
finally { _callGate.Release(); }
|
||||
}
|
||||
|
||||
private async Task<TReply> ExchangeAsync<TRequest, TReply>(
|
||||
MessageKind requestKind, MessageKind expectedReplyKind, TRequest request, CancellationToken ct)
|
||||
{
|
||||
await _writer!.WriteAsync(requestKind, request, ct).ConfigureAwait(false);
|
||||
var frame = await _reader!.ReadFrameAsync(ct).ConfigureAwait(false)
|
||||
?? throw new EndOfStreamException("Sidecar closed connection before reply.");
|
||||
if (frame.Kind != expectedReplyKind)
|
||||
{
|
||||
throw new InvalidDataException(
|
||||
$"Sidecar replied with kind {frame.Kind}; expected {expectedReplyKind}.");
|
||||
}
|
||||
return MessagePackSerializer.Deserialize<TReply>(frame.Body);
|
||||
}
|
||||
|
||||
private async Task ConnectInternalAsync(CancellationToken ct)
|
||||
{
|
||||
ResetTransport();
|
||||
|
||||
_stream = await _connect(ct).ConfigureAwait(false);
|
||||
_reader = new FrameReader(_stream, leaveOpen: true);
|
||||
_writer = new FrameWriter(_stream, leaveOpen: true);
|
||||
|
||||
var hello = new Hello
|
||||
{
|
||||
ProtocolMajor = Hello.CurrentMajor,
|
||||
ProtocolMinor = Hello.CurrentMinor,
|
||||
PeerName = _options.PeerName,
|
||||
SharedSecret = _options.SharedSecret,
|
||||
};
|
||||
await _writer.WriteAsync(MessageKind.Hello, hello, ct).ConfigureAwait(false);
|
||||
|
||||
var ackFrame = await _reader.ReadFrameAsync(ct).ConfigureAwait(false)
|
||||
?? throw new EndOfStreamException("Sidecar closed connection before HelloAck.");
|
||||
if (ackFrame.Kind != MessageKind.HelloAck)
|
||||
{
|
||||
ResetTransport();
|
||||
throw new InvalidDataException($"Sidecar replied to Hello with kind {ackFrame.Kind}; expected HelloAck.");
|
||||
}
|
||||
|
||||
var ack = MessagePackSerializer.Deserialize<HelloAck>(ackFrame.Body);
|
||||
if (!ack.Accepted)
|
||||
{
|
||||
ResetTransport();
|
||||
throw new UnauthorizedAccessException(
|
||||
$"Sidecar rejected Hello: {ack.RejectReason ?? "<no reason>"}.");
|
||||
}
|
||||
|
||||
_logger.LogInformation("Sidecar TCP connected — host={Host}", ack.HostName);
|
||||
}
|
||||
|
||||
private void ResetTransport()
|
||||
{
|
||||
_writer?.Dispose();
|
||||
_reader?.Dispose();
|
||||
_stream?.Dispose();
|
||||
_writer = null;
|
||||
_reader = null;
|
||||
_stream = null;
|
||||
}
|
||||
|
||||
/// <summary>Releases all resources associated with this channel.</summary>
|
||||
/// <returns>A task representing the asynchronous disposal operation.</returns>
|
||||
public ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return ValueTask.CompletedTask;
|
||||
_disposed = true;
|
||||
ResetTransport();
|
||||
_callGate.Dispose();
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
}
|
||||
-42
@@ -1,42 +0,0 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a raw OPC DA quality byte (as returned by Wonderware Historian's <c>OpcQuality</c>)
|
||||
/// to an OPC UA <c>StatusCode</c> uint. Byte-identical port of the sidecar's
|
||||
/// <c>HistorianQualityMapper.Map</c> — kept in sync via parity tests rather than a
|
||||
/// shared assembly because the sidecar is .NET 4.8 (x64) and the client is .NET 10 (x64).
|
||||
/// </summary>
|
||||
internal static class QualityMapper
|
||||
{
|
||||
/// <summary>Maps an OPC DA quality byte to an OPC UA StatusCode.</summary>
|
||||
/// <param name="q">The OPC DA quality byte value.</param>
|
||||
/// <returns>An OPC UA StatusCode as a uint.</returns>
|
||||
public static uint Map(byte q) => q switch
|
||||
{
|
||||
// Good family (192+)
|
||||
192 => 0x00000000u, // Good
|
||||
216 => 0x00D80000u, // Good_LocalOverride
|
||||
|
||||
// Uncertain family (64-191)
|
||||
64 => 0x40000000u, // Uncertain
|
||||
68 => 0x40900000u, // Uncertain_LastUsableValue
|
||||
80 => 0x40930000u, // Uncertain_SensorNotAccurate
|
||||
84 => 0x40940000u, // Uncertain_EngineeringUnitsExceeded
|
||||
88 => 0x40950000u, // Uncertain_SubNormal
|
||||
|
||||
// Bad family (0-63)
|
||||
0 => 0x80000000u, // Bad
|
||||
4 => 0x80890000u, // Bad_ConfigurationError
|
||||
8 => 0x808A0000u, // Bad_NotConnected
|
||||
12 => 0x808B0000u, // Bad_DeviceFailure
|
||||
16 => 0x808C0000u, // Bad_SensorFailure
|
||||
20 => 0x80050000u, // Bad_CommunicationError
|
||||
24 => 0x808D0000u, // Bad_OutOfService
|
||||
32 => 0x80320000u, // Bad_WaitingForInitialData
|
||||
|
||||
// Unknown — fall back to category bucket so callers still get something usable.
|
||||
_ when q >= 192 => 0x00000000u,
|
||||
_ when q >= 64 => 0x40000000u,
|
||||
_ => 0x80000000u,
|
||||
};
|
||||
}
|
||||
@@ -1,232 +0,0 @@
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
|
||||
// ============================================================================
|
||||
// Wire DTOs for the sidecar pipe protocol — byte-identical mirror of the
|
||||
// sidecar's Contracts.cs. The sidecar is .NET 4.8 x64; this client is .NET 10
|
||||
// x64. Both ends carry their own copy of these MessagePack DTOs and stay in
|
||||
// sync via the round-trip tests in PR 3.4 + the byte-equality parity test.
|
||||
//
|
||||
// MessagePack [Key] indices MUST match the sidecar's exactly. Adding a field
|
||||
// is an additive change as long as it lands at a fresh index on both sides;
|
||||
// reordering or removing keys is a wire break.
|
||||
//
|
||||
// Timestamps cross the wire as DateTime ticks (long) to dodge MessagePack's
|
||||
// DateTime kind/timezone quirks; both sides convert with DateTime(ticks, Utc).
|
||||
// ============================================================================
|
||||
|
||||
/// <summary>Single historical data point. Quality is the raw OPC DA byte; client maps to OPC UA StatusCode.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HistorianSampleDto
|
||||
{
|
||||
/// <summary>MessagePack-serialized value bytes. Client deserializes per the tag's mx_data_type.</summary>
|
||||
[Key(0)] public byte[]? ValueBytes { get; set; }
|
||||
|
||||
/// <summary>Raw OPC DA quality byte from the historian SDK (low 8 bits of OpcQuality).</summary>
|
||||
[Key(1)] public byte Quality { get; set; }
|
||||
|
||||
/// <summary>Gets the UTC timestamp in ticks.</summary>
|
||||
[Key(2)] public long TimestampUtcTicks { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>Aggregate bucket; <c>Value</c> is null when the aggregate is unavailable for the bucket.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HistorianAggregateSampleDto
|
||||
{
|
||||
/// <summary>Gets or sets the aggregate value.</summary>
|
||||
[Key(0)] public double? Value { get; set; }
|
||||
/// <summary>Gets or sets the UTC timestamp in ticks.</summary>
|
||||
[Key(1)] public long TimestampUtcTicks { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>Historian event row.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HistorianEventDto
|
||||
{
|
||||
/// <summary>Gets or sets the event identifier.</summary>
|
||||
[Key(0)] public string EventId { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the event source name.</summary>
|
||||
[Key(1)] public string? Source { get; set; }
|
||||
/// <summary>Gets or sets the event time in UTC ticks.</summary>
|
||||
[Key(2)] public long EventTimeUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the received time in UTC ticks.</summary>
|
||||
[Key(3)] public long ReceivedTimeUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the event display text.</summary>
|
||||
[Key(4)] public string? DisplayText { get; set; }
|
||||
/// <summary>Gets or sets the event severity.</summary>
|
||||
[Key(5)] public ushort Severity { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>Alarm event to persist back into the historian event store.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class AlarmHistorianEventDto
|
||||
{
|
||||
/// <summary>Gets or sets the event identifier.</summary>
|
||||
[Key(0)] public string EventId { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the source name.</summary>
|
||||
[Key(1)] public string SourceName { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the condition identifier.</summary>
|
||||
[Key(2)] public string? ConditionId { get; set; }
|
||||
/// <summary>Gets or sets the alarm type.</summary>
|
||||
[Key(3)] public string AlarmType { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the alarm message.</summary>
|
||||
[Key(4)] public string? Message { get; set; }
|
||||
/// <summary>Gets or sets the alarm severity.</summary>
|
||||
[Key(5)] public ushort Severity { get; set; }
|
||||
/// <summary>Gets or sets the event time in UTC ticks.</summary>
|
||||
[Key(6)] public long EventTimeUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the acknowledgment comment.</summary>
|
||||
[Key(7)] public string? AckComment { get; set; }
|
||||
}
|
||||
|
||||
// ===== Read Raw =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadRawRequest
|
||||
{
|
||||
/// <summary>Gets or sets the tag name.</summary>
|
||||
[Key(0)] public string TagName { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the start time in UTC ticks.</summary>
|
||||
[Key(1)] public long StartUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the end time in UTC ticks.</summary>
|
||||
[Key(2)] public long EndUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the maximum number of values to read.</summary>
|
||||
[Key(3)] public int MaxValues { get; set; }
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(4)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadRawReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets a value indicating whether the operation succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
/// <summary>Gets or sets the error message if the operation failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
/// <summary>Gets or sets the historian samples.</summary>
|
||||
[Key(3)] public HistorianSampleDto[] Samples { get; set; } = Array.Empty<HistorianSampleDto>();
|
||||
}
|
||||
|
||||
// ===== Read Processed =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadProcessedRequest
|
||||
{
|
||||
/// <summary>Gets or sets the tag name.</summary>
|
||||
[Key(0)] public string TagName { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the start time in UTC ticks.</summary>
|
||||
[Key(1)] public long StartUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the end time in UTC ticks.</summary>
|
||||
[Key(2)] public long EndUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the interval in milliseconds.</summary>
|
||||
[Key(3)] public double IntervalMs { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Wonderware AnalogSummary column name: "Average", "Minimum", "Maximum", "ValueCount".
|
||||
/// The .NET 10 client maps OPC UA aggregate enum → column.
|
||||
/// </summary>
|
||||
[Key(4)] public string AggregateColumn { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(5)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadProcessedReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets a value indicating whether the operation succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
/// <summary>Gets or sets the error message if the operation failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
/// <summary>Gets or sets the aggregate sample buckets.</summary>
|
||||
[Key(3)] public HistorianAggregateSampleDto[] Buckets { get; set; } = Array.Empty<HistorianAggregateSampleDto>();
|
||||
}
|
||||
|
||||
// ===== Read At-Time =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadAtTimeRequest
|
||||
{
|
||||
/// <summary>Gets or sets the tag name.</summary>
|
||||
[Key(0)] public string TagName { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets the timestamps in UTC ticks.</summary>
|
||||
[Key(1)] public long[] TimestampsUtcTicks { get; set; } = Array.Empty<long>();
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(2)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadAtTimeReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets a value indicating whether the operation succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
/// <summary>Gets or sets the error message if the operation failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
/// <summary>Gets or sets the historian samples.</summary>
|
||||
[Key(3)] public HistorianSampleDto[] Samples { get; set; } = Array.Empty<HistorianSampleDto>();
|
||||
}
|
||||
|
||||
// ===== Read Events =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadEventsRequest
|
||||
{
|
||||
/// <summary>Gets or sets the source name.</summary>
|
||||
[Key(0)] public string? SourceName { get; set; }
|
||||
/// <summary>Gets or sets the start time in UTC ticks.</summary>
|
||||
[Key(1)] public long StartUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the end time in UTC ticks.</summary>
|
||||
[Key(2)] public long EndUtcTicks { get; set; }
|
||||
/// <summary>Gets or sets the maximum number of events to read.</summary>
|
||||
[Key(3)] public int MaxEvents { get; set; }
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(4)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadEventsReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets a value indicating whether the operation succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
/// <summary>Gets or sets the error message if the operation failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
/// <summary>Gets or sets the historian events.</summary>
|
||||
[Key(3)] public HistorianEventDto[] Events { get; set; } = Array.Empty<HistorianEventDto>();
|
||||
}
|
||||
|
||||
// ===== Write Alarm Events =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class WriteAlarmEventsRequest
|
||||
{
|
||||
/// <summary>Gets or sets the alarm historian events to write.</summary>
|
||||
[Key(0)] public AlarmHistorianEventDto[] Events { get; set; } = Array.Empty<AlarmHistorianEventDto>();
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(1)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class WriteAlarmEventsReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
/// <summary>Gets or sets a value indicating whether the operation succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
/// <summary>Gets or sets the error message if the operation failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
|
||||
/// <summary>Per-event success flag, parallel to <see cref="WriteAlarmEventsRequest.Events"/>.</summary>
|
||||
[Key(3)] public bool[] PerEventOk { get; set; } = Array.Empty<bool>();
|
||||
|
||||
/// <summary>Per-event status parallel to the request's Events: 0=Ack, 1=Retry, 2=Permanent.
|
||||
/// Empty ⇒ an older sidecar that only sent <see cref="PerEventOk"/>; the client falls back to it.</summary>
|
||||
[Key(4)] public byte[] PerEventStatus { get; set; } = Array.Empty<byte>();
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Reads length-prefixed, kind-tagged frames from a stream. Single-consumer — do not call
|
||||
/// <see cref="ReadFrameAsync"/> from multiple threads against the same instance. Mirror of
|
||||
/// the sidecar's <c>FrameReader</c>; kept byte-identical so the wire protocol stays stable.
|
||||
/// </summary>
|
||||
public sealed class FrameReader : IDisposable
|
||||
{
|
||||
private readonly Stream _stream;
|
||||
private readonly bool _leaveOpen;
|
||||
|
||||
/// <summary>Initializes a new instance of the <see cref="FrameReader"/> class.</summary>
|
||||
/// <param name="stream">The stream to read frames from.</param>
|
||||
/// <param name="leaveOpen">True to leave the stream open after disposal; false to dispose it.</param>
|
||||
public FrameReader(Stream stream, bool leaveOpen = false)
|
||||
{
|
||||
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
_leaveOpen = leaveOpen;
|
||||
}
|
||||
|
||||
/// <summary>Reads a single frame from the stream.</summary>
|
||||
/// <param name="ct">A cancellation token.</param>
|
||||
/// <returns>A tuple of the message kind and body bytes, or null at end-of-stream.</returns>
|
||||
public async Task<(MessageKind Kind, byte[] Body)?> ReadFrameAsync(CancellationToken ct)
|
||||
{
|
||||
var lengthPrefix = new byte[Framing.LengthPrefixSize];
|
||||
if (!await ReadExactAsync(lengthPrefix, ct).ConfigureAwait(false))
|
||||
return null; // clean EOF on frame boundary
|
||||
|
||||
var length = (lengthPrefix[0] << 24) | (lengthPrefix[1] << 16) | (lengthPrefix[2] << 8) | lengthPrefix[3];
|
||||
if (length < 0 || length > Framing.MaxFrameBodyBytes)
|
||||
throw new InvalidDataException($"Sidecar IPC frame length {length} out of range.");
|
||||
|
||||
// Read the kind byte asynchronously and cancellably — a synchronous ReadByte()
|
||||
// blocks the thread-pool thread and cannot be interrupted by the call-timeout token
|
||||
// if the peer stalls mid-frame (finding 005).
|
||||
var kindBuffer = new byte[Framing.KindByteSize];
|
||||
if (!await ReadExactAsync(kindBuffer, ct).ConfigureAwait(false))
|
||||
throw new EndOfStreamException("EOF after length prefix, before kind byte.");
|
||||
|
||||
var body = new byte[length];
|
||||
if (!await ReadExactAsync(body, ct).ConfigureAwait(false))
|
||||
throw new EndOfStreamException("EOF mid-frame.");
|
||||
|
||||
return ((MessageKind)kindBuffer[0], body);
|
||||
}
|
||||
|
||||
/// <summary>Deserializes a frame body from MessagePack binary format.</summary>
|
||||
/// <typeparam name="T">The target type to deserialize the body into.</typeparam>
|
||||
/// <param name="body">The frame body bytes to deserialize.</param>
|
||||
/// <returns>The deserialized object of the specified type.</returns>
|
||||
public static T Deserialize<T>(byte[] body) => MessagePackSerializer.Deserialize<T>(body);
|
||||
|
||||
private async Task<bool> ReadExactAsync(byte[] buffer, CancellationToken ct)
|
||||
{
|
||||
var offset = 0;
|
||||
while (offset < buffer.Length)
|
||||
{
|
||||
var read = await _stream.ReadAsync(buffer.AsMemory(offset, buffer.Length - offset), ct).ConfigureAwait(false);
|
||||
if (read == 0)
|
||||
{
|
||||
if (offset == 0) return false;
|
||||
throw new EndOfStreamException($"Stream ended after reading {offset} of {buffer.Length} bytes.");
|
||||
}
|
||||
offset += read;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>Releases the stream resources if <c>leaveOpen</c> was false.</summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (!_leaveOpen) _stream.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Writes length-prefixed, kind-tagged MessagePack frames to a stream. Thread-safe via
|
||||
/// <see cref="SemaphoreSlim"/>. Byte-identical mirror of the sidecar's FrameWriter.
|
||||
/// </summary>
|
||||
public sealed class FrameWriter : IDisposable
|
||||
{
|
||||
private readonly Stream _stream;
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
private readonly bool _leaveOpen;
|
||||
|
||||
/// <summary>Initializes a new instance of the FrameWriter class.</summary>
|
||||
/// <param name="stream">The underlying stream to write frames to.</param>
|
||||
/// <param name="leaveOpen">If true, the stream is not disposed when this writer is disposed.</param>
|
||||
public FrameWriter(Stream stream, bool leaveOpen = false)
|
||||
{
|
||||
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
_leaveOpen = leaveOpen;
|
||||
}
|
||||
|
||||
/// <summary>Writes a length-prefixed, kind-tagged MessagePack frame to the stream.</summary>
|
||||
/// <typeparam name="T">The type of the message to serialize.</typeparam>
|
||||
/// <param name="kind">The frame message kind tag.</param>
|
||||
/// <param name="message">The message object to serialize and write.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
public async Task WriteAsync<T>(MessageKind kind, T message, CancellationToken ct)
|
||||
{
|
||||
var body = MessagePackSerializer.Serialize(message, cancellationToken: ct);
|
||||
if (body.Length > Framing.MaxFrameBodyBytes)
|
||||
throw new InvalidOperationException(
|
||||
$"Sidecar IPC frame body {body.Length} exceeds {Framing.MaxFrameBodyBytes} byte cap.");
|
||||
|
||||
// 5-byte header: [4-byte big-endian body length][1-byte message kind].
|
||||
// The kind byte is folded into the header array so every write inside the gate
|
||||
// is async+cancellable — a synchronous Stream.WriteByte() blocks the calling
|
||||
// thread-pool thread and cannot be interrupted by the call-timeout token when
|
||||
// the peer's receive window is full (same class of bug as finding 005 on reads).
|
||||
var header = new byte[Framing.LengthPrefixSize + Framing.KindByteSize];
|
||||
header[0] = (byte)((body.Length >> 24) & 0xFF);
|
||||
header[1] = (byte)((body.Length >> 16) & 0xFF);
|
||||
header[2] = (byte)((body.Length >> 8) & 0xFF);
|
||||
header[3] = (byte)( body.Length & 0xFF);
|
||||
header[4] = (byte)kind;
|
||||
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
await _stream.WriteAsync(header, ct).ConfigureAwait(false);
|
||||
await _stream.WriteAsync(body, ct).ConfigureAwait(false);
|
||||
await _stream.FlushAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
|
||||
/// <summary>Disposes the writer and underlying stream (if not left open).</summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_gate.Dispose();
|
||||
if (!_leaveOpen) _stream.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Length-prefixed framing constants for the Wonderware historian sidecar pipe protocol.
|
||||
/// Each frame on the wire is:
|
||||
/// <c>[4-byte big-endian length][1-byte message kind][MessagePack body]</c>.
|
||||
/// Length is the body size only; the kind byte is not part of the prefixed length.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Byte-identical mirror of the sidecar's <c>Driver.Historian.Wonderware.Ipc.Framing</c>.
|
||||
/// The sidecar is .NET 4.8 x64; this client is .NET 10 x64 — the differing target
|
||||
/// frameworks mean they cannot share an assembly, so the wire constants are duplicated
|
||||
/// here. PR 3.4 ships round-trip tests that pin the byte-level parity.
|
||||
/// </remarks>
|
||||
public static class Framing
|
||||
{
|
||||
public const int LengthPrefixSize = 4;
|
||||
public const int KindByteSize = 1;
|
||||
|
||||
/// <summary>16 MiB cap protects the receiver from a hostile or buggy peer.</summary>
|
||||
public const int MaxFrameBodyBytes = 16 * 1024 * 1024;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wire identifier for each historian sidecar message. Values are stable — never reorder;
|
||||
/// append new contracts at the end. The .NET 10 client and the .NET 4.8 sidecar must
|
||||
/// agree on every value here. Byte-identical with the sidecar enum.
|
||||
/// </summary>
|
||||
public enum MessageKind : byte
|
||||
{
|
||||
Hello = 0x01,
|
||||
HelloAck = 0x02,
|
||||
|
||||
ReadRawRequest = 0x10,
|
||||
ReadRawReply = 0x11,
|
||||
|
||||
ReadProcessedRequest = 0x12,
|
||||
ReadProcessedReply = 0x13,
|
||||
|
||||
ReadAtTimeRequest = 0x14,
|
||||
ReadAtTimeReply = 0x15,
|
||||
|
||||
ReadEventsRequest = 0x16,
|
||||
ReadEventsReply = 0x17,
|
||||
|
||||
WriteAlarmEventsRequest = 0x20,
|
||||
WriteAlarmEventsReply = 0x21,
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// First frame of every connection. Advertises the sidecar protocol version and the
|
||||
/// per-process shared secret the supervisor passed at spawn time. Byte-identical mirror
|
||||
/// of the sidecar's <c>Hello</c> contract.
|
||||
/// </summary>
|
||||
[MessagePackObject]
|
||||
public sealed class Hello
|
||||
{
|
||||
public const int CurrentMajor = 1;
|
||||
public const int CurrentMinor = 0;
|
||||
|
||||
/// <summary>Gets or sets the protocol major version.</summary>
|
||||
[Key(0)] public int ProtocolMajor { get; set; } = CurrentMajor;
|
||||
/// <summary>Gets or sets the protocol minor version.</summary>
|
||||
[Key(1)] public int ProtocolMinor { get; set; } = CurrentMinor;
|
||||
/// <summary>Gets or sets the peer name identifying the client.</summary>
|
||||
[Key(2)] public string PeerName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Per-process shared secret — verified against the value the supervisor passed at spawn time.</summary>
|
||||
[Key(3)] public string SharedSecret { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Acknowledgment response to a <see cref="Hello"/> frame. Indicates acceptance and the remote host name.
|
||||
/// </summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HelloAck
|
||||
{
|
||||
/// <summary>Gets or sets the protocol major version.</summary>
|
||||
[Key(0)] public int ProtocolMajor { get; set; } = Hello.CurrentMajor;
|
||||
/// <summary>Gets or sets the protocol minor version.</summary>
|
||||
[Key(1)] public int ProtocolMinor { get; set; } = Hello.CurrentMinor;
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the connection was accepted.</summary>
|
||||
[Key(2)] public bool Accepted { get; set; }
|
||||
/// <summary>Gets or sets the rejection reason if the connection was not accepted.</summary>
|
||||
[Key(3)] public string? RejectReason { get; set; }
|
||||
/// <summary>Gets or sets the host name of the remote server.</summary>
|
||||
[Key(4)] public string HostName { get; set; } = string.Empty;
|
||||
}
|
||||
-607
@@ -1,607 +0,0 @@
|
||||
using MessagePack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Internal;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
using ClientHistorianEventDto = ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc.HistorianEventDto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
|
||||
|
||||
/// <summary>
|
||||
/// .NET 10 client for the Wonderware historian sidecar (PR 3.3 protocol). Implements both
|
||||
/// <see cref="IHistorianDataSource"/> (read paths consumed by
|
||||
/// <c>Server.History.IHistoryRouter</c>) and <see cref="IAlarmHistorianWriter"/>
|
||||
/// (alarm-event drain consumed by <c>Core.AlarmHistorian.SqliteStoreAndForwardSink</c>).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The client owns a single <see cref="FrameChannel"/> with one in-flight call at a time;
|
||||
/// concurrent calls serialize on the channel's gate. Reconnect is handled inside the
|
||||
/// channel — transient transport failures retry once before propagating.
|
||||
/// </remarks>
|
||||
public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHistorianWriter, IAsyncDisposable
|
||||
{
|
||||
private readonly FrameChannel _channel;
|
||||
private readonly object _healthLock = new();
|
||||
private DateTime? _lastSuccessUtc;
|
||||
private DateTime? _lastFailureUtc;
|
||||
private string? _lastError;
|
||||
private long _totalQueries;
|
||||
private long _totalSuccesses;
|
||||
private long _totalFailures;
|
||||
private int _consecutiveFailures;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a client that connects to the Wonderware historian sidecar over TCP.
|
||||
/// Tests that need an in-process duplex pair use the <see cref="ForTests"/> factory.
|
||||
/// </summary>
|
||||
/// <param name="options">The client connection options.</param>
|
||||
/// <param name="logger">Optional logger for diagnostic output.</param>
|
||||
public WonderwareHistorianClient(WonderwareHistorianClientOptions options, ILogger<WonderwareHistorianClient>? logger = null)
|
||||
: this(options, ct => FrameChannel.DefaultTcpConnectFactory(options, ct), logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>Test seam — inject an arbitrary connect callback.</summary>
|
||||
/// <param name="options">The client connection options.</param>
|
||||
/// <param name="connect">A callback that establishes the connection stream.</param>
|
||||
/// <param name="logger">Optional logger for diagnostic output.</param>
|
||||
/// <returns>A new WonderwareHistorianClient configured for testing.</returns>
|
||||
public static WonderwareHistorianClient ForTests(
|
||||
WonderwareHistorianClientOptions options,
|
||||
Func<CancellationToken, Task<Stream>> connect,
|
||||
ILogger<WonderwareHistorianClient>? logger = null)
|
||||
=> new(options, connect, logger);
|
||||
|
||||
private WonderwareHistorianClient(
|
||||
WonderwareHistorianClientOptions options,
|
||||
Func<CancellationToken, Task<Stream>> connect,
|
||||
ILogger<WonderwareHistorianClient>? logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
var log = (ILogger?)logger ?? NullLogger.Instance;
|
||||
_channel = new FrameChannel(options, connect, log);
|
||||
}
|
||||
|
||||
// ===== IHistorianDataSource =====
|
||||
|
||||
/// <summary>Asynchronously reads raw historical data for a tag within a time range.</summary>
|
||||
/// <param name="fullReference">The full reference path of the tag to read.</param>
|
||||
/// <param name="startUtc">The start time in UTC for the read range.</param>
|
||||
/// <param name="endUtc">The end time in UTC for the read range.</param>
|
||||
/// <param name="maxValuesPerNode">The maximum number of values to return.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task that returns the historical read result.</returns>
|
||||
public async Task<HistoryReadResult> ReadRawAsync(
|
||||
string fullReference, DateTime startUtc, DateTime endUtc, uint maxValuesPerNode,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var req = new ReadRawRequest
|
||||
{
|
||||
TagName = fullReference,
|
||||
StartUtcTicks = startUtc.Ticks,
|
||||
EndUtcTicks = endUtc.Ticks,
|
||||
MaxValues = (int)Math.Min(maxValuesPerNode, int.MaxValue),
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await InvokeAndClassifyAsync<ReadRawRequest, ReadRawReply>(
|
||||
MessageKind.ReadRawRequest, MessageKind.ReadRawReply, req,
|
||||
r => (r.Success, r.Error), "ReadRaw", cancellationToken).ConfigureAwait(false);
|
||||
return new HistoryReadResult(ToSnapshots(reply.Samples), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
/// <summary>Asynchronously reads processed historical data with aggregation for a tag within a time range.</summary>
|
||||
/// <remarks>
|
||||
/// <see cref="HistoryAggregateType.Total"/> is derived client-side as the time-weighted
|
||||
/// Average × interval-seconds; Wonderware AnalogSummary exposes no Total column. The wire
|
||||
/// request is issued with the Average column and each returned bucket value is scaled by
|
||||
/// <c>interval.TotalSeconds</c>, preserving the bucket's status code and timestamp. All
|
||||
/// other aggregates pass through unchanged.
|
||||
/// </remarks>
|
||||
/// <param name="fullReference">The full reference path of the tag to read.</param>
|
||||
/// <param name="startUtc">The start time in UTC for the read range.</param>
|
||||
/// <param name="endUtc">The end time in UTC for the read range.</param>
|
||||
/// <param name="interval">The time interval for aggregation.</param>
|
||||
/// <param name="aggregate">The type of aggregation to apply.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task that returns the historical read result with aggregated data.</returns>
|
||||
public async Task<HistoryReadResult> ReadProcessedAsync(
|
||||
string fullReference, DateTime startUtc, DateTime endUtc, TimeSpan interval,
|
||||
HistoryAggregateType aggregate, CancellationToken cancellationToken)
|
||||
{
|
||||
// Total has no AnalogSummary column — request the time-weighted Average and scale
|
||||
// client-side below (Total = Average × interval-seconds).
|
||||
var isDerivedTotal = aggregate == HistoryAggregateType.Total;
|
||||
var wireAggregate = isDerivedTotal ? HistoryAggregateType.Average : aggregate;
|
||||
|
||||
var req = new ReadProcessedRequest
|
||||
{
|
||||
TagName = fullReference,
|
||||
StartUtcTicks = startUtc.Ticks,
|
||||
EndUtcTicks = endUtc.Ticks,
|
||||
IntervalMs = interval.TotalMilliseconds,
|
||||
AggregateColumn = MapAggregate(wireAggregate),
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await InvokeAndClassifyAsync<ReadProcessedRequest, ReadProcessedReply>(
|
||||
MessageKind.ReadProcessedRequest, MessageKind.ReadProcessedReply, req,
|
||||
r => (r.Success, r.Error), "ReadProcessed", cancellationToken).ConfigureAwait(false);
|
||||
|
||||
var buckets = isDerivedTotal
|
||||
? ScaleAverageToTotal(reply.Buckets, interval.TotalSeconds)
|
||||
: reply.Buckets;
|
||||
return new HistoryReadResult(ToAggregateSnapshots(buckets), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Derives <see cref="HistoryAggregateType.Total"/> buckets from time-weighted Average
|
||||
/// buckets using the time-integral identity Total = Average × interval-seconds. Null
|
||||
/// (unavailable) buckets are carried through unscaled so the downstream null→BadNoData
|
||||
/// mapping still fires; non-null values are multiplied by <paramref name="intervalSeconds"/>.
|
||||
/// </summary>
|
||||
private static HistorianAggregateSampleDto[] ScaleAverageToTotal(
|
||||
HistorianAggregateSampleDto[] averages, double intervalSeconds)
|
||||
{
|
||||
if (averages.Length == 0) return averages;
|
||||
var totals = new HistorianAggregateSampleDto[averages.Length];
|
||||
for (var i = 0; i < averages.Length; i++)
|
||||
{
|
||||
var avg = averages[i];
|
||||
totals[i] = new HistorianAggregateSampleDto
|
||||
{
|
||||
// Null (unavailable) average → null total (→ BadNoData downstream).
|
||||
Value = avg.Value is { } v ? v * intervalSeconds : null,
|
||||
TimestampUtcTicks = avg.TimestampUtcTicks,
|
||||
};
|
||||
}
|
||||
return totals;
|
||||
}
|
||||
|
||||
/// <summary>Asynchronously reads historical data at specific timestamps for a tag.</summary>
|
||||
/// <param name="fullReference">The full reference path of the tag to read.</param>
|
||||
/// <param name="timestampsUtc">The specific timestamps in UTC to read values for.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task that returns the historical read result with values at the specified times.</returns>
|
||||
public async Task<HistoryReadResult> ReadAtTimeAsync(
|
||||
string fullReference, IReadOnlyList<DateTime> timestampsUtc, CancellationToken cancellationToken)
|
||||
{
|
||||
var ticks = new long[timestampsUtc.Count];
|
||||
for (var i = 0; i < timestampsUtc.Count; i++) ticks[i] = timestampsUtc[i].Ticks;
|
||||
|
||||
var req = new ReadAtTimeRequest
|
||||
{
|
||||
TagName = fullReference,
|
||||
TimestampsUtcTicks = ticks,
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await InvokeAndClassifyAsync<ReadAtTimeRequest, ReadAtTimeReply>(
|
||||
MessageKind.ReadAtTimeRequest, MessageKind.ReadAtTimeReply, req,
|
||||
r => (r.Success, r.Error), "ReadAtTime", cancellationToken).ConfigureAwait(false);
|
||||
return new HistoryReadResult(AlignAtTimeSnapshots(timestampsUtc, reply.Samples), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reconciles a <c>ReadAtTime</c> sidecar reply against the requested timestamps to
|
||||
/// honour the <see cref="IHistorianDataSource.ReadAtTimeAsync"/> contract: the result
|
||||
/// MUST have exactly one snapshot per requested timestamp, in request order. The sidecar
|
||||
/// is not required to return a sample for every timestamp (e.g. it may drop
|
||||
/// boundary-less timestamps) nor to preserve order, so each requested timestamp is
|
||||
/// matched by ticks; any timestamp the sidecar did not return is filled with a
|
||||
/// Bad-quality (<c>0x80000000</c>) snapshot rather than positionally misaligning values.
|
||||
/// </summary>
|
||||
private static IReadOnlyList<DataValueSnapshot> AlignAtTimeSnapshots(
|
||||
IReadOnlyList<DateTime> timestampsUtc, HistorianSampleDto[] samples)
|
||||
{
|
||||
// Index returned samples by timestamp ticks. Duplicate timestamps keep the first.
|
||||
var byTicks = new Dictionary<long, HistorianSampleDto>(samples.Length);
|
||||
foreach (var sample in samples)
|
||||
byTicks.TryAdd(sample.TimestampUtcTicks, sample);
|
||||
|
||||
var result = new DataValueSnapshot[timestampsUtc.Count];
|
||||
for (var i = 0; i < timestampsUtc.Count; i++)
|
||||
{
|
||||
var requested = DateTime.SpecifyKind(timestampsUtc[i], DateTimeKind.Utc);
|
||||
if (byTicks.TryGetValue(requested.Ticks, out var dto))
|
||||
{
|
||||
result[i] = new DataValueSnapshot(
|
||||
Value: DeserializeSampleValue(dto.ValueBytes),
|
||||
StatusCode: QualityMapper.Map(dto.Quality),
|
||||
SourceTimestampUtc: requested,
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Gap — sidecar returned no sample for this timestamp. Per the contract this
|
||||
// is a Bad-quality snapshot stamped at the requested time, not a dropped row.
|
||||
result[i] = new DataValueSnapshot(
|
||||
Value: null,
|
||||
StatusCode: 0x80000000u, // Bad
|
||||
SourceTimestampUtc: requested,
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>Asynchronously reads historical events within a time range.</summary>
|
||||
/// <param name="sourceName">The source name filter for events, or null to read all sources.</param>
|
||||
/// <param name="startUtc">The start time in UTC for the read range.</param>
|
||||
/// <param name="endUtc">The end time in UTC for the read range.</param>
|
||||
/// <param name="maxEvents">The maximum number of events to return.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task that returns the historical events result.</returns>
|
||||
public async Task<HistoricalEventsResult> ReadEventsAsync(
|
||||
string? sourceName, DateTime startUtc, DateTime endUtc, int maxEvents,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var req = new ReadEventsRequest
|
||||
{
|
||||
SourceName = sourceName,
|
||||
StartUtcTicks = startUtc.Ticks,
|
||||
EndUtcTicks = endUtc.Ticks,
|
||||
MaxEvents = maxEvents,
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await InvokeAndClassifyAsync<ReadEventsRequest, ReadEventsReply>(
|
||||
MessageKind.ReadEventsRequest, MessageKind.ReadEventsReply, req,
|
||||
r => (r.Success, r.Error), "ReadEvents", cancellationToken).ConfigureAwait(false);
|
||||
return new HistoricalEventsResult(ToHistoricalEvents(reply.Events), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a snapshot of operation counters and the single TCP channel's connection
|
||||
/// state.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This client owns one TCP channel to the sidecar — it has no notion of
|
||||
/// separate process / event connections and no per-node telemetry. The single channel's
|
||||
/// connected state is reported for both <see cref="HistorianHealthSnapshot.ProcessConnectionOpen"/>
|
||||
/// and <see cref="HistorianHealthSnapshot.EventConnectionOpen"/>, and
|
||||
/// <see cref="HistorianHealthSnapshot.ActiveProcessNode"/> /
|
||||
/// <see cref="HistorianHealthSnapshot.ActiveEventNode"/> /
|
||||
/// <see cref="HistorianHealthSnapshot.Nodes"/> are intentionally null/empty. Consumers
|
||||
/// that need to distinguish two connections should read another driver. (Finding 010.)
|
||||
/// <para>
|
||||
/// All six counter fields (TotalQueries, TotalSuccesses, TotalFailures,
|
||||
/// ConsecutiveFailures, LastSuccessTime, LastFailureTime, LastError) are mutated
|
||||
/// exclusively under <c>_healthLock</c>, so the snapshot is internally consistent —
|
||||
/// in particular <c>TotalSuccesses + TotalFailures == TotalQueries</c> at every
|
||||
/// observed snapshot (a call that has started but not yet completed has not
|
||||
/// incremented any counter). (Finding 003 / 004.)
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public HistorianHealthSnapshot GetHealthSnapshot()
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
return new HistorianHealthSnapshot(
|
||||
TotalQueries: _totalQueries,
|
||||
TotalSuccesses: _totalSuccesses,
|
||||
TotalFailures: _totalFailures,
|
||||
ConsecutiveFailures: _consecutiveFailures,
|
||||
LastSuccessTime: _lastSuccessUtc,
|
||||
LastFailureTime: _lastFailureUtc,
|
||||
LastError: _lastError,
|
||||
ProcessConnectionOpen: _channel.IsConnected,
|
||||
EventConnectionOpen: _channel.IsConnected,
|
||||
ActiveProcessNode: null,
|
||||
ActiveEventNode: null,
|
||||
Nodes: []);
|
||||
}
|
||||
}
|
||||
|
||||
// ===== IAlarmHistorianWriter =====
|
||||
|
||||
/// <summary>
|
||||
/// Writes a batch of alarm events to the Wonderware historian via the sidecar.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Per-event status:</b> when the sidecar populates the additive
|
||||
/// <see cref="WriteAlarmEventsReply.PerEventStatus"/> wire field (0=Ack, 1=Retry,
|
||||
/// 2=Permanent), each slot maps directly to <see cref="HistorianWriteOutcome.Ack"/> /
|
||||
/// <see cref="HistorianWriteOutcome.RetryPlease"/> / <see cref="HistorianWriteOutcome.PermanentFail"/>.
|
||||
/// The sidecar emits <c>Permanent</c> for structurally-malformed (poison) events,
|
||||
/// so the store-and-forward drain worker dead-letters them immediately instead of
|
||||
/// looping to the retry cap. An older sidecar that sends only the legacy
|
||||
/// <see cref="WriteAlarmEventsReply.PerEventOk"/> boolean is handled by the
|
||||
/// fallback path below (true→Ack, false→RetryPlease) for rolling-deploy back-compat.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Documented boundary:</b> only <i>structurally</i>-malformed events surface as
|
||||
/// <see cref="HistorianWriteOutcome.PermanentFail"/>. A structurally-valid event that
|
||||
/// the AAH historian SDK rejects for a deeper, semantic reason still maps to
|
||||
/// <see cref="HistorianWriteOutcome.RetryPlease"/> (→ retry cap), because the sidecar's
|
||||
/// writer returns only a transient/persisted boolean for events it actually attempts.
|
||||
/// Surfacing richer SDK-semantic permanent rejections requires the infra-gated
|
||||
/// <c>AahClientManagedAlarmEventWriter</c> to report a status code rather than a bool.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Transport or deserialization failures, and any whole-call failure
|
||||
/// (<c>Success=false</c>), return <see cref="HistorianWriteOutcome.RetryPlease"/> for
|
||||
/// every event in the batch; the drain worker's backoff controls recovery.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
/// <param name="batch">The batch of alarm historian events to write.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task that returns per-event write outcomes.</returns>
|
||||
public async Task<IReadOnlyList<HistorianWriteOutcome>> WriteBatchAsync(
|
||||
IReadOnlyList<AlarmHistorianEvent> batch, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(batch);
|
||||
if (batch.Count == 0) return [];
|
||||
|
||||
var dtos = new AlarmHistorianEventDto[batch.Count];
|
||||
for (var i = 0; i < batch.Count; i++) dtos[i] = ToDto(batch[i]);
|
||||
|
||||
var req = new WriteAlarmEventsRequest
|
||||
{
|
||||
Events = dtos,
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var reply = await InvokeAsync<WriteAlarmEventsRequest, WriteAlarmEventsReply>(
|
||||
MessageKind.WriteAlarmEventsRequest, MessageKind.WriteAlarmEventsReply, req,
|
||||
r => (r.Success, r.Error), cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Whole-call failure → transient retry for every event in the batch.
|
||||
if (!reply.Success)
|
||||
{
|
||||
var fail = new HistorianWriteOutcome[batch.Count];
|
||||
Array.Fill(fail, HistorianWriteOutcome.RetryPlease);
|
||||
return fail;
|
||||
}
|
||||
|
||||
// Prefer the granular per-event status when the sidecar provides it (new wire
|
||||
// field); fall back to the legacy PerEventOk bool for older sidecars. The sidecar
|
||||
// emits status 2 (Permanent) for structurally-malformed poison events so they
|
||||
// dead-letter immediately rather than retrying to the cap.
|
||||
if (reply.PerEventStatus is { Length: > 0 } status && status.Length == batch.Count)
|
||||
{
|
||||
var statusOutcomes = new HistorianWriteOutcome[batch.Count];
|
||||
for (var i = 0; i < batch.Count; i++)
|
||||
statusOutcomes[i] = status[i] switch
|
||||
{
|
||||
0 => HistorianWriteOutcome.Ack,
|
||||
2 => HistorianWriteOutcome.PermanentFail,
|
||||
_ => HistorianWriteOutcome.RetryPlease, // 1 or unknown
|
||||
};
|
||||
return statusOutcomes;
|
||||
}
|
||||
|
||||
// Legacy fallback: PerEventOk[i] = true → Ack; false → RetryPlease. An older
|
||||
// sidecar without PerEventStatus can never signal PermanentFail through this
|
||||
// path, so a poison event retries to the drain worker's cap.
|
||||
var outcomes = new HistorianWriteOutcome[batch.Count];
|
||||
for (var i = 0; i < batch.Count; i++)
|
||||
{
|
||||
var ok = i < reply.PerEventOk.Length && reply.PerEventOk[i];
|
||||
outcomes[i] = ok ? HistorianWriteOutcome.Ack : HistorianWriteOutcome.RetryPlease;
|
||||
}
|
||||
return outcomes;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Transport / deserialization failure — every event is retry-please. The drain
|
||||
// worker's backoff handles recovery. PermanentFail is only emitted from the
|
||||
// success path's PerEventStatus mapping, never from a transport failure.
|
||||
var fail = new HistorianWriteOutcome[batch.Count];
|
||||
Array.Fill(fail, HistorianWriteOutcome.RetryPlease);
|
||||
return fail;
|
||||
}
|
||||
}
|
||||
|
||||
// ===== Constants =====
|
||||
|
||||
/// <summary>
|
||||
/// Per-sample ValueBytes size cap. MessagePack with the default
|
||||
/// <see cref="MessagePack.Resolvers.StandardResolver"/> (primitive-only — no typeless
|
||||
/// or dynamic-type resolution) is not susceptible to type-confusion gadget chains, but
|
||||
/// we still cap the per-sample byte budget to guard against a buggy or unexpectedly
|
||||
/// large peer payload. 64 KiB is well above any primitive historian value.
|
||||
/// (Finding 007 — NuGetAuditSuppress GHSA-37gx-xxp4-5rgx / GHSA-w3x6-4m5h-cxqf.)
|
||||
/// </summary>
|
||||
private const int MaxValueBytesPerSample = 64 * 1024;
|
||||
|
||||
// ===== Helpers =====
|
||||
|
||||
/// <summary>
|
||||
/// Sends one request through the channel and records the outcome (transport success or
|
||||
/// transport failure) under a single <c>_healthLock</c> acquisition that also bumps
|
||||
/// <c>_totalQueries</c>. Sidecar-level success / failure is NOT classified here — the
|
||||
/// caller passes that through <see cref="InvokeAndClassifyAsync"/> instead. (Finding
|
||||
/// 003 / 004: all six counter fields share one synchronization mechanism so a snapshot
|
||||
/// can never observe a torn state.)
|
||||
/// </summary>
|
||||
private async Task<TReply> InvokeAsync<TRequest, TReply>(
|
||||
MessageKind requestKind, MessageKind expectedReplyKind, TRequest request,
|
||||
Func<TReply, (bool ok, string? error)> evaluate, CancellationToken ct)
|
||||
where TReply : class
|
||||
{
|
||||
try
|
||||
{
|
||||
var reply = await _channel.InvokeAsync<TRequest, TReply>(requestKind, expectedReplyKind, request, ct).ConfigureAwait(false);
|
||||
// Classify transport+sidecar in one lock so TotalQueries/TotalSuccesses/
|
||||
// TotalFailures move together and no intermediate "success-then-undo" state is
|
||||
// visible to a concurrent GetHealthSnapshot.
|
||||
var (ok, error) = evaluate(reply);
|
||||
RecordOutcome(ok, error);
|
||||
return reply;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
RecordOutcome(success: false, ex.Message);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convenience wrapper around <see cref="InvokeAsync"/> that throws
|
||||
/// <see cref="InvalidOperationException"/> on a sidecar-reported failure. Used by the
|
||||
/// <see cref="IHistorianDataSource"/> read methods.
|
||||
/// </summary>
|
||||
private async Task<TReply> InvokeAndClassifyAsync<TRequest, TReply>(
|
||||
MessageKind requestKind, MessageKind expectedReplyKind, TRequest request,
|
||||
Func<TReply, (bool ok, string? error)> evaluate, string op, CancellationToken ct)
|
||||
where TReply : class
|
||||
{
|
||||
var reply = await InvokeAsync<TRequest, TReply>(requestKind, expectedReplyKind, request, evaluate, ct).ConfigureAwait(false);
|
||||
var (ok, error) = evaluate(reply);
|
||||
if (!ok)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Sidecar {op} failed: {error ?? "<no message>"}.");
|
||||
}
|
||||
return reply;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records the outcome of a single call — increments <c>_totalQueries</c> and exactly
|
||||
/// one of <c>_totalSuccesses</c> / <c>_totalFailures</c> under a single
|
||||
/// <c>_healthLock</c> acquisition. (Findings 003 + 004.)
|
||||
/// </summary>
|
||||
private void RecordOutcome(bool success, string? error)
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
_totalQueries++;
|
||||
if (success)
|
||||
{
|
||||
_totalSuccesses++;
|
||||
_consecutiveFailures = 0;
|
||||
_lastSuccessUtc = DateTime.UtcNow;
|
||||
}
|
||||
else
|
||||
{
|
||||
_totalFailures++;
|
||||
_consecutiveFailures++;
|
||||
_lastFailureUtc = DateTime.UtcNow;
|
||||
_lastError = error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserializes a sample's value bytes using the MessagePack default
|
||||
/// <see cref="MessagePack.Resolvers.StandardResolver"/> (primitive types only — no
|
||||
/// typeless or dynamic-type resolution). A per-sample size cap guards against a
|
||||
/// hostile or buggy peer sending an unexpectedly large payload before deserialization
|
||||
/// allocates memory for it. (Finding 007.)
|
||||
/// </summary>
|
||||
private static object? DeserializeSampleValue(byte[]? valueBytes)
|
||||
{
|
||||
if (valueBytes is null) return null;
|
||||
if (valueBytes.Length > MaxValueBytesPerSample)
|
||||
throw new InvalidDataException(
|
||||
$"Sidecar sample ValueBytes length {valueBytes.Length} exceeds the {MaxValueBytesPerSample}-byte cap.");
|
||||
// Deserializes using the default resolver which only handles primitive types
|
||||
// (bool, int, long, float, double, string, byte[], DateTime, etc.). The resolver
|
||||
// does NOT support TypelessContractlessStandardResolver so no type-confusion gadget
|
||||
// chains are reachable from this call site.
|
||||
return MessagePackSerializer.Deserialize<object>(valueBytes);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<DataValueSnapshot> ToSnapshots(HistorianSampleDto[] dtos)
|
||||
{
|
||||
if (dtos.Length == 0) return [];
|
||||
var snapshots = new DataValueSnapshot[dtos.Length];
|
||||
for (var i = 0; i < dtos.Length; i++)
|
||||
{
|
||||
var dto = dtos[i];
|
||||
snapshots[i] = new DataValueSnapshot(
|
||||
Value: DeserializeSampleValue(dto.ValueBytes),
|
||||
StatusCode: QualityMapper.Map(dto.Quality),
|
||||
SourceTimestampUtc: new DateTime(dto.TimestampUtcTicks, DateTimeKind.Utc),
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
return snapshots;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<DataValueSnapshot> ToAggregateSnapshots(HistorianAggregateSampleDto[] dtos)
|
||||
{
|
||||
if (dtos.Length == 0) return [];
|
||||
var snapshots = new DataValueSnapshot[dtos.Length];
|
||||
for (var i = 0; i < dtos.Length; i++)
|
||||
{
|
||||
var dto = dtos[i];
|
||||
// Null aggregate value → BadNoData per Core.Abstractions HistoryReadResult convention.
|
||||
snapshots[i] = new DataValueSnapshot(
|
||||
Value: dto.Value,
|
||||
StatusCode: dto.Value is null ? 0x800E0000u /* BadNoData */ : 0x00000000u /* Good */,
|
||||
SourceTimestampUtc: new DateTime(dto.TimestampUtcTicks, DateTimeKind.Utc),
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
return snapshots;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<HistoricalEvent> ToHistoricalEvents(ClientHistorianEventDto[] dtos)
|
||||
{
|
||||
if (dtos.Length == 0) return [];
|
||||
var events = new HistoricalEvent[dtos.Length];
|
||||
for (var i = 0; i < dtos.Length; i++)
|
||||
{
|
||||
var dto = dtos[i];
|
||||
events[i] = new HistoricalEvent(
|
||||
EventId: dto.EventId,
|
||||
SourceName: dto.Source,
|
||||
EventTimeUtc: new DateTime(dto.EventTimeUtcTicks, DateTimeKind.Utc),
|
||||
ReceivedTimeUtc: new DateTime(dto.ReceivedTimeUtcTicks, DateTimeKind.Utc),
|
||||
Message: dto.DisplayText,
|
||||
Severity: dto.Severity);
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
private static AlarmHistorianEventDto ToDto(AlarmHistorianEvent evt) => new()
|
||||
{
|
||||
EventId = evt.AlarmId,
|
||||
SourceName = evt.EquipmentPath,
|
||||
ConditionId = evt.AlarmName,
|
||||
AlarmType = evt.AlarmTypeName + ":" + evt.EventKind,
|
||||
Message = evt.Message,
|
||||
Severity = MapSeverity(evt.Severity),
|
||||
EventTimeUtcTicks = evt.TimestampUtc.Ticks,
|
||||
AckComment = evt.Comment,
|
||||
};
|
||||
|
||||
private static ushort MapSeverity(AlarmSeverity severity) => severity switch
|
||||
{
|
||||
AlarmSeverity.Low => 250,
|
||||
AlarmSeverity.Medium => 500,
|
||||
AlarmSeverity.High => 700,
|
||||
AlarmSeverity.Critical => 900,
|
||||
_ => 500,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Maps an OPC UA aggregate to its Wonderware AnalogSummary column name. There is no
|
||||
/// Total column — <see cref="HistoryAggregateType.Total"/> is derived client-side in
|
||||
/// <see cref="ReadProcessedAsync"/> by requesting Average, so it is never passed here.
|
||||
/// </summary>
|
||||
private static string MapAggregate(HistoryAggregateType aggregate) => aggregate switch
|
||||
{
|
||||
HistoryAggregateType.Average => "Average",
|
||||
HistoryAggregateType.Minimum => "Minimum",
|
||||
HistoryAggregateType.Maximum => "Maximum",
|
||||
HistoryAggregateType.Count => "ValueCount",
|
||||
_ => throw new NotSupportedException($"Unknown HistoryAggregateType {aggregate}"),
|
||||
};
|
||||
|
||||
/// <summary>Asynchronously disposes the client and its underlying TCP channel.</summary>
|
||||
/// <returns>A task that completes when the client has been disposed.</returns>
|
||||
public ValueTask DisposeAsync() => _channel.DisposeAsync();
|
||||
|
||||
/// <summary>
|
||||
/// Synchronous dispose required by <see cref="IDisposable"/> on
|
||||
/// <see cref="IHistorianDataSource"/>. The underlying channel's async cleanup runs the
|
||||
/// TCP socket teardown, which can block briefly on OS handle release — strictly speaking
|
||||
/// it is not non-blocking — but the <c>GetAwaiter()/GetResult()</c> bridge is
|
||||
/// deadlock-safe because the cleanup never awaits a captured
|
||||
/// <see cref="System.Threading.SynchronizationContext"/> nor takes any lock that the
|
||||
/// caller could hold. (Finding 010.)
|
||||
/// </summary>
|
||||
public void Dispose() => _channel.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
}
|
||||
-93
@@ -1,93 +0,0 @@
|
||||
using System.Diagnostics;
|
||||
using System.Net.Sockets;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Internal;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Ipc;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
|
||||
|
||||
/// <summary>
|
||||
/// TCP-connect probe for the <see cref="WonderwareHistorianClientOptions"/>-shaped driver
|
||||
/// config. Opens a socket to the configured <c>Host:Port</c> (optionally performing the TLS
|
||||
/// client handshake when <c>UseTls</c> is set, reusing the same pinned-thumbprint / CA-chain
|
||||
/// validation as <see cref="FrameChannel.DefaultTcpConnectFactory"/>), then sends a
|
||||
/// <see cref="Hello"/> with the configured shared secret and confirms the sidecar's
|
||||
/// <see cref="HelloAck"/> is accepted — a true end-to-end reachability + auth check.
|
||||
/// Surfaces a green tick + latency on success; a clear red message on timeout / connection
|
||||
/// refused / TLS failure / rejected Hello.
|
||||
/// </summary>
|
||||
public sealed class WonderwareHistorianDriverProbe : IDriverProbe
|
||||
{
|
||||
private static readonly JsonSerializerOptions _opts = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
UnmappedMemberHandling = JsonUnmappedMemberHandling.Skip,
|
||||
Converters = { new JsonStringEnumConverter() },
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DriverType => "Historian.Wonderware";
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DriverProbeResult> ProbeAsync(string configJson, TimeSpan timeout, CancellationToken ct)
|
||||
{
|
||||
WonderwareHistorianClientOptions? opts;
|
||||
try { opts = JsonSerializer.Deserialize<WonderwareHistorianClientOptions>(configJson, _opts); }
|
||||
catch (Exception ex) { return new(false, $"Config JSON is invalid: {ex.Message}", null); }
|
||||
if (opts is null) return new(false, "Config JSON deserialized to null.", null);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(opts.Host) || opts.Port <= 0)
|
||||
return new(false, "Config has no host/port to probe.", null);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
Stream? stream = null;
|
||||
try
|
||||
{
|
||||
// Reuse the runtime connect factory so the probe exercises the exact TCP + TLS
|
||||
// (pinned-thumbprint or CA-chain) path the client uses in production.
|
||||
stream = await FrameChannel.DefaultTcpConnectFactory(opts, ct).ConfigureAwait(false);
|
||||
|
||||
using var reader = new FrameReader(stream, leaveOpen: true);
|
||||
using var writer = new FrameWriter(stream, leaveOpen: true);
|
||||
|
||||
var hello = new Hello
|
||||
{
|
||||
ProtocolMajor = Hello.CurrentMajor,
|
||||
ProtocolMinor = Hello.CurrentMinor,
|
||||
PeerName = opts.PeerName,
|
||||
SharedSecret = opts.SharedSecret,
|
||||
};
|
||||
await writer.WriteAsync(MessageKind.Hello, hello, ct).ConfigureAwait(false);
|
||||
|
||||
var ackFrame = await reader.ReadFrameAsync(ct).ConfigureAwait(false)
|
||||
?? throw new EndOfStreamException("Sidecar closed connection before HelloAck.");
|
||||
if (ackFrame.Kind != MessageKind.HelloAck)
|
||||
return new(false, $"Sidecar replied to Hello with kind {ackFrame.Kind}; expected HelloAck.", null);
|
||||
|
||||
var ack = FrameReader.Deserialize<HelloAck>(ackFrame.Body);
|
||||
if (!ack.Accepted)
|
||||
return new(false, $"Sidecar rejected Hello: {ack.RejectReason ?? "<no reason>"}.", null);
|
||||
|
||||
sw.Stop();
|
||||
return new(true, $"Connected to {opts.Host}:{opts.Port} (tls={opts.UseTls})", sw.Elapsed);
|
||||
}
|
||||
catch (SocketException ex)
|
||||
{
|
||||
return new(false, $"Connect failed: {ex.SocketErrorCode}", null);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
return new(false, $"Probe timed out after {timeout.TotalSeconds:F0}s.", null);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return new(false, ex.Message, null);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (stream is not null) await stream.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
-30
@@ -1,30 +0,0 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Platforms>AnyCPU;x64</Platforms>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<LangVersion>latest</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<NoWarn>$(NoWarn);CS1591</NoWarn>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="MessagePack"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Contracts.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.Abstractions\ZB.MOM.WW.OtOpcUa.Core.Abstractions.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.Tests"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
-117
@@ -1,117 +0,0 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Serilog;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// IPC-side <see cref="IAlarmEventWriter"/> implementation that delegates to an
|
||||
/// <see cref="IAlarmHistorianWriteBackend"/> (production: aahClientManaged-bound)
|
||||
/// and maps the trinary <see cref="AlarmHistorianWriteOutcome"/> down to the
|
||||
/// <c>bool[]</c> the IPC reply contract carries. Per-event outcomes:
|
||||
/// <list type="bullet">
|
||||
/// <item><description><see cref="AlarmHistorianWriteOutcome.Ack"/> → <c>true</c> (drop from sender's queue).</description></item>
|
||||
/// <item><description><see cref="AlarmHistorianWriteOutcome.RetryPlease"/> → <c>false</c> (sender retries on next drain tick).</description></item>
|
||||
/// <item><description><see cref="AlarmHistorianWriteOutcome.PermanentFail"/> → <c>false</c> (sender's B.4 widens the IPC bool back into the trinary outcome by inspecting structured diagnostics; this slot intentionally collapses to "not-ok" at the wire).</description></item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
public sealed class AahClientManagedAlarmEventWriter : IAlarmEventWriter
|
||||
{
|
||||
private static readonly ILogger Log = Serilog.Log.ForContext<AahClientManagedAlarmEventWriter>();
|
||||
|
||||
private readonly IAlarmHistorianWriteBackend _backend;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the AahClientManagedAlarmEventWriter class.
|
||||
/// </summary>
|
||||
/// <param name="backend">The alarm historian write backend to delegate to.</param>
|
||||
public AahClientManagedAlarmEventWriter(IAlarmHistorianWriteBackend backend)
|
||||
{
|
||||
_backend = backend ?? throw new ArgumentNullException(nameof(backend));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes an array of alarm historian events asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="events">The alarm events to write.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async Task<bool[]> WriteAsync(AlarmHistorianEventDto[] events, CancellationToken cancellationToken)
|
||||
{
|
||||
if (events is null || events.Length == 0)
|
||||
{
|
||||
return new bool[0];
|
||||
}
|
||||
|
||||
AlarmHistorianWriteOutcome[] outcomes;
|
||||
try
|
||||
{
|
||||
outcomes = await _backend.WriteBatchAsync(events, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Backend-level failure (cluster unreachable, transport error). Treat the
|
||||
// whole batch as RetryPlease so the sender's queue holds the rows for
|
||||
// the next drain tick — preferable to dropping them on a transient.
|
||||
Log.Warning(ex,
|
||||
"Alarm historian backend WriteBatchAsync threw — marking entire {Count}-event batch RetryPlease.",
|
||||
events.Length);
|
||||
var fallback = new bool[events.Length];
|
||||
return fallback;
|
||||
}
|
||||
|
||||
if (outcomes.Length != events.Length)
|
||||
{
|
||||
// Backend contract violation — defensive degrade so a bug in the backend
|
||||
// doesn't desync the sender's queue accounting. Treat as RetryPlease.
|
||||
Log.Warning(
|
||||
"Alarm historian backend returned {ReturnedCount} outcomes for a batch of {InputCount} events; degrading to RetryPlease for the whole batch.",
|
||||
outcomes.Length, events.Length);
|
||||
return new bool[events.Length];
|
||||
}
|
||||
|
||||
var perEventOk = new bool[outcomes.Length];
|
||||
for (var i = 0; i < outcomes.Length; i++)
|
||||
{
|
||||
perEventOk[i] = outcomes[i] == AlarmHistorianWriteOutcome.Ack;
|
||||
}
|
||||
return perEventOk;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Translate the outcome of a single SDK call (raw HRESULT + diagnostic) into the
|
||||
/// trinary <see cref="AlarmHistorianWriteOutcome"/>. Exposed for the production
|
||||
/// <see cref="SdkAlarmHistorianWriteBackend"/> to share the mapping with tests.
|
||||
/// </summary>
|
||||
/// <param name="hresult">The HRESULT code from the SDK call.</param>
|
||||
/// <param name="isCommunicationError">Indicates whether the error is a communication-class error.</param>
|
||||
/// <param name="isMalformedInput">Indicates whether the input was malformed.</param>
|
||||
public static AlarmHistorianWriteOutcome MapOutcome(int hresult, bool isCommunicationError, bool isMalformedInput)
|
||||
{
|
||||
// Order matters: malformed input is permanent regardless of HRESULT pattern;
|
||||
// communication-class errors are transient regardless of which specific
|
||||
// HRESULT bit fired.
|
||||
if (isMalformedInput)
|
||||
{
|
||||
return AlarmHistorianWriteOutcome.PermanentFail;
|
||||
}
|
||||
if (hresult == 0)
|
||||
{
|
||||
return AlarmHistorianWriteOutcome.Ack;
|
||||
}
|
||||
if (isCommunicationError)
|
||||
{
|
||||
return AlarmHistorianWriteOutcome.RetryPlease;
|
||||
}
|
||||
// Default: unknown HRESULT failure — be conservative and let the sender retry.
|
||||
// The sender's drain worker has its own dead-letter cap so a permanently-broken
|
||||
// event won't loop forever.
|
||||
return AlarmHistorianWriteOutcome.RetryPlease;
|
||||
}
|
||||
}
|
||||
}
|
||||
-19
@@ -1,19 +0,0 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Per-event outcome from <see cref="IAlarmHistorianWriteBackend.WriteBatchAsync"/>.
|
||||
/// Sidecar-local twin of <c>Core.AlarmHistorian.HistorianWriteOutcome</c> (the
|
||||
/// sidecar runs net48 and cannot reference the net10 Core project; the IPC
|
||||
/// contract narrows this to <c>bool</c> per slot, so the lmxopcua-side consumer
|
||||
/// widens that back into the trinary outcome at the IPC boundary in PR B.4).
|
||||
/// </summary>
|
||||
public enum AlarmHistorianWriteOutcome
|
||||
{
|
||||
/// <summary>Event accepted by the historian. Drop from the store-and-forward queue.</summary>
|
||||
Ack,
|
||||
/// <summary>Transient failure (server busy, disconnected, timeout). Leave queued; retry on next drain tick.</summary>
|
||||
RetryPlease,
|
||||
/// <summary>Permanent failure (malformed event, unrecoverable SDK error). Move to dead-letter on the lmxopcua side.</summary>
|
||||
PermanentFail,
|
||||
}
|
||||
}
|
||||
-148
@@ -1,148 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Thread-safe, pure-logic endpoint picker for the Wonderware Historian cluster. Tracks which
|
||||
/// configured nodes are healthy, places failed nodes in a time-bounded cooldown, and hands
|
||||
/// out an ordered list of eligible candidates for the data source to try in sequence.
|
||||
/// </summary>
|
||||
internal sealed class HistorianClusterEndpointPicker
|
||||
{
|
||||
private readonly Func<DateTime> _clock;
|
||||
private readonly TimeSpan _cooldown;
|
||||
private readonly object _lock = new object();
|
||||
private readonly List<NodeEntry> _nodes;
|
||||
|
||||
/// <summary>Initializes the picker with default system clock.</summary>
|
||||
/// <param name="config">Historian configuration.</param>
|
||||
public HistorianClusterEndpointPicker(HistorianConfiguration config)
|
||||
: this(config, () => DateTime.UtcNow) { }
|
||||
|
||||
/// <summary>Initializes the picker with custom clock function.</summary>
|
||||
/// <param name="config">Historian configuration.</param>
|
||||
/// <param name="clock">Clock function for testing.</param>
|
||||
internal HistorianClusterEndpointPicker(HistorianConfiguration config, Func<DateTime> clock)
|
||||
{
|
||||
_clock = clock ?? throw new ArgumentNullException(nameof(clock));
|
||||
_cooldown = TimeSpan.FromSeconds(Math.Max(0, config.FailureCooldownSeconds));
|
||||
|
||||
var names = (config.ServerNames != null && config.ServerNames.Count > 0)
|
||||
? config.ServerNames
|
||||
: new List<string> { config.ServerName };
|
||||
|
||||
_nodes = names
|
||||
.Where(n => !string.IsNullOrWhiteSpace(n))
|
||||
.Select(n => n.Trim())
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.Select(n => new NodeEntry { Name = n })
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <summary>Gets the total count of configured nodes.</summary>
|
||||
public int NodeCount
|
||||
{
|
||||
get { lock (_lock) return _nodes.Count; }
|
||||
}
|
||||
|
||||
/// <summary>Gets the list of currently healthy nodes.</summary>
|
||||
public IReadOnlyList<string> GetHealthyNodes()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var now = _clock();
|
||||
return _nodes.Where(n => IsHealthyAt(n, now)).Select(n => n.Name).ToList();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Gets the count of currently healthy nodes.</summary>
|
||||
public int HealthyNodeCount
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var now = _clock();
|
||||
return _nodes.Count(n => IsHealthyAt(n, now));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Marks a node as failed and starts its cooldown.</summary>
|
||||
/// <param name="node">Node name.</param>
|
||||
/// <param name="error">Optional error message.</param>
|
||||
public void MarkFailed(string node, string? error)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var entry = FindEntry(node);
|
||||
if (entry == null) return;
|
||||
|
||||
var now = _clock();
|
||||
entry.FailureCount++;
|
||||
entry.LastError = error;
|
||||
entry.LastFailureTime = now;
|
||||
entry.CooldownUntil = _cooldown.TotalMilliseconds > 0 ? now + _cooldown : (DateTime?)null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Marks a node as healthy and clears its cooldown.</summary>
|
||||
/// <param name="node">Node name.</param>
|
||||
public void MarkHealthy(string node)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var entry = FindEntry(node);
|
||||
if (entry == null) return;
|
||||
entry.CooldownUntil = null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Returns a snapshot of all node states.</summary>
|
||||
public List<HistorianClusterNodeState> SnapshotNodeStates()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var now = _clock();
|
||||
return _nodes.Select(n => new HistorianClusterNodeState
|
||||
{
|
||||
Name = n.Name,
|
||||
IsHealthy = IsHealthyAt(n, now),
|
||||
CooldownUntil = IsHealthyAt(n, now) ? null : n.CooldownUntil,
|
||||
FailureCount = n.FailureCount,
|
||||
LastError = n.LastError,
|
||||
LastFailureTime = n.LastFailureTime
|
||||
}).ToList();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsHealthyAt(NodeEntry entry, DateTime now)
|
||||
{
|
||||
return entry.CooldownUntil == null || entry.CooldownUntil <= now;
|
||||
}
|
||||
|
||||
private NodeEntry? FindEntry(string node)
|
||||
{
|
||||
for (var i = 0; i < _nodes.Count; i++)
|
||||
if (string.Equals(_nodes[i].Name, node, StringComparison.OrdinalIgnoreCase))
|
||||
return _nodes[i];
|
||||
return null;
|
||||
}
|
||||
|
||||
private sealed class NodeEntry
|
||||
{
|
||||
/// <summary>Gets or sets the node name.</summary>
|
||||
public string Name { get; set; } = "";
|
||||
/// <summary>Gets or sets when cooldown expires.</summary>
|
||||
public DateTime? CooldownUntil { get; set; }
|
||||
/// <summary>Gets or sets the failure count.</summary>
|
||||
public int FailureCount { get; set; }
|
||||
/// <summary>Gets or sets the last error message.</summary>
|
||||
public string? LastError { get; set; }
|
||||
/// <summary>Gets or sets the last failure time.</summary>
|
||||
public DateTime? LastFailureTime { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
||||
-29
@@ -1,29 +0,0 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Point-in-time state of a single historian cluster node. One entry per configured node
|
||||
/// appears inside <see cref="HistorianHealthSnapshot"/>.
|
||||
/// </summary>
|
||||
public sealed class HistorianClusterNodeState
|
||||
{
|
||||
/// <summary>Gets or sets the node name.</summary>
|
||||
public string Name { get; set; } = "";
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the node is healthy.</summary>
|
||||
public bool IsHealthy { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the time until the node exits cooldown mode.</summary>
|
||||
public DateTime? CooldownUntil { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the count of recent failures.</summary>
|
||||
public int FailureCount { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the last error message.</summary>
|
||||
public string? LastError { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the time of the last failure.</summary>
|
||||
public DateTime? LastFailureTime { get; set; }
|
||||
}
|
||||
}
|
||||
-49
@@ -1,49 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Wonderware Historian SDK configuration. Populated from environment variables at
|
||||
/// sidecar startup (see <c>Program.cs</c>): the supervisor (lmxopcua-side
|
||||
/// <c>WonderwareHistorianClient</c>) spawns the sidecar with these env vars; UA
|
||||
/// translation lives on the client side of the TCP IPC, so this surface is
|
||||
/// kept OPC-UA-free. The legacy v1 Galaxy.Host / Proxy host this lived in retired
|
||||
/// in PR 7.2.
|
||||
/// </summary>
|
||||
public sealed class HistorianConfiguration
|
||||
{
|
||||
/// <summary>Gets or sets a value indicating whether Historian integration is enabled.</summary>
|
||||
public bool Enabled { get; set; } = false;
|
||||
|
||||
/// <summary>Single-node fallback when <see cref="ServerNames"/> is empty.</summary>
|
||||
public string ServerName { get; set; } = "localhost";
|
||||
|
||||
/// <summary>
|
||||
/// Ordered cluster nodes. When non-empty, the data source tries each in order on connect,
|
||||
/// falling through to the next on failure. A failed node is placed in cooldown for
|
||||
/// <see cref="FailureCooldownSeconds"/> before being re-eligible.
|
||||
/// </summary>
|
||||
public List<string> ServerNames { get; set; } = new();
|
||||
|
||||
/// <summary>Gets or sets the failure cooldown period in seconds.</summary>
|
||||
public int FailureCooldownSeconds { get; set; } = 60;
|
||||
/// <summary>Gets or sets a value indicating whether to use integrated security.</summary>
|
||||
public bool IntegratedSecurity { get; set; } = true;
|
||||
/// <summary>Gets or sets the user name for authentication.</summary>
|
||||
public string? UserName { get; set; }
|
||||
/// <summary>Gets or sets the password for authentication.</summary>
|
||||
public string? Password { get; set; }
|
||||
/// <summary>Gets or sets the Historian server port.</summary>
|
||||
public int Port { get; set; } = 32568;
|
||||
/// <summary>Gets or sets the command timeout in seconds.</summary>
|
||||
public int CommandTimeoutSeconds { get; set; } = 30;
|
||||
/// <summary>Gets or sets the maximum number of values per read operation.</summary>
|
||||
public int MaxValuesPerRead { get; set; } = 10000;
|
||||
|
||||
/// <summary>
|
||||
/// Outer safety timeout applied to sync-over-async Historian operations. Must be
|
||||
/// comfortably larger than <see cref="CommandTimeoutSeconds"/>.
|
||||
/// </summary>
|
||||
public int RequestTimeoutSeconds { get; set; } = 60;
|
||||
}
|
||||
}
|
||||
-863
@@ -1,863 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using StringCollection = System.Collections.Specialized.StringCollection;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ArchestrA;
|
||||
using Serilog;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Reads historical data from the Wonderware Historian via the aahClientManaged SDK.
|
||||
/// OPC-UA-free — emits <see cref="HistorianSample"/>/<see cref="HistorianAggregateSample"/>
|
||||
/// which the sidecar serialises onto the TCP wire (PR 3.3 contracts) for the
|
||||
/// .NET 10 <c>WonderwareHistorianClient</c> to translate into OPC UA <c>DataValue</c>
|
||||
/// on its side of the IPC. The v1 Galaxy.Host / Proxy architecture this class
|
||||
/// originally lived in retired in PR 7.2.
|
||||
/// </summary>
|
||||
public sealed class HistorianDataSource : IHistorianDataSource
|
||||
{
|
||||
private static readonly ILogger Log = Serilog.Log.ForContext<HistorianDataSource>();
|
||||
|
||||
private readonly HistorianConfiguration _config;
|
||||
private readonly object _connectionLock = new object();
|
||||
private readonly object _eventConnectionLock = new object();
|
||||
private readonly IHistorianConnectionFactory _factory;
|
||||
private HistorianAccess? _connection;
|
||||
private HistorianAccess? _eventConnection;
|
||||
private bool _disposed;
|
||||
|
||||
private readonly object _healthLock = new object();
|
||||
private long _totalSuccesses;
|
||||
private long _totalFailures;
|
||||
private int _consecutiveFailures;
|
||||
private DateTime? _lastSuccessTime;
|
||||
private DateTime? _lastFailureTime;
|
||||
private string? _lastError;
|
||||
private string? _activeProcessNode;
|
||||
private string? _activeEventNode;
|
||||
|
||||
private readonly HistorianClusterEndpointPicker _picker;
|
||||
|
||||
/// <summary>Initializes a new instance of the <see cref="HistorianDataSource"/> class with the default connection factory.</summary>
|
||||
/// <param name="config">The historian configuration.</param>
|
||||
public HistorianDataSource(HistorianConfiguration config)
|
||||
: this(config, new SdkHistorianConnectionFactory(), null) { }
|
||||
|
||||
/// <summary>Initializes a new instance of the <see cref="HistorianDataSource"/> class with the specified connection factory and endpoint picker.</summary>
|
||||
/// <param name="config">The historian configuration.</param>
|
||||
/// <param name="factory">The historian connection factory.</param>
|
||||
/// <param name="picker">The optional cluster endpoint picker.</param>
|
||||
internal HistorianDataSource(
|
||||
HistorianConfiguration config,
|
||||
IHistorianConnectionFactory factory,
|
||||
HistorianClusterEndpointPicker? picker = null)
|
||||
{
|
||||
_config = config;
|
||||
_factory = factory;
|
||||
_picker = picker ?? new HistorianClusterEndpointPicker(config);
|
||||
}
|
||||
|
||||
// Error codes that signify the connection or server is the problem rather than the
|
||||
// query itself. A query-class failure (bad tag name, unsupported aggregate, etc.) must
|
||||
// not force us to tear down and re-open the (relatively expensive) historian
|
||||
// connection — that would let a burst of bad-tag queries push an otherwise healthy
|
||||
// cluster node into cooldown. See Driver.Historian.Wonderware-008.
|
||||
private static readonly HashSet<HistorianAccessError.ErrorValue> ConnectionErrorCodes =
|
||||
new HashSet<HistorianAccessError.ErrorValue>
|
||||
{
|
||||
HistorianAccessError.ErrorValue.FailedToConnect,
|
||||
HistorianAccessError.ErrorValue.FailedToCreateSession,
|
||||
HistorianAccessError.ErrorValue.NoReply,
|
||||
HistorianAccessError.ErrorValue.NotReady,
|
||||
HistorianAccessError.ErrorValue.NotInitialized,
|
||||
HistorianAccessError.ErrorValue.Stopping,
|
||||
HistorianAccessError.ErrorValue.Win32Exception,
|
||||
HistorianAccessError.ErrorValue.InvalidResponse,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Whether an <c>aahClientManaged</c> error code indicates that the
|
||||
/// <em>connection</em> (rather than the query payload) is the problem and the
|
||||
/// shared SDK connection should therefore be reset. Internal for unit testing.
|
||||
/// </summary>
|
||||
/// <param name="code">The historian access error code.</param>
|
||||
internal static bool IsConnectionClassError(HistorianAccessError.ErrorValue code)
|
||||
=> ConnectionErrorCodes.Contains(code);
|
||||
|
||||
/// <summary>
|
||||
/// Whether a failed <c>StartQuery</c> in the per-timestamp at-time loop should reset
|
||||
/// the shared SDK connection (and abort the read) rather than record a per-timestamp
|
||||
/// Bad sample and continue. Returns <c>true</c> only for connection-class error
|
||||
/// codes; query-class / no-data codes (and a missing error) return <c>false</c> so
|
||||
/// a single bad/empty timestamp does not tear down a connection that is still serving
|
||||
/// the other timestamps. The <c>HistoryQuery</c> SDK type is non-virtual and has no
|
||||
/// interface, so the at-time loop can't be driven offline — this pure helper is the
|
||||
/// unit-testable seam for the classification. See Driver.Historian.Wonderware-014.
|
||||
/// </summary>
|
||||
/// <param name="error">The SDK error returned by the failed <c>StartQuery</c>.</param>
|
||||
internal static bool ShouldResetConnectionForStartQueryFailure(HistorianAccessError? error)
|
||||
=> IsConnectionClassError(error?.ErrorCode ?? HistorianAccessError.ErrorValue.Failure);
|
||||
|
||||
/// <summary>
|
||||
/// Builds the per-read <see cref="CancellationTokenSource"/> linked into the
|
||||
/// caller's <paramref name="ct"/> and pre-wired to fire after
|
||||
/// <see cref="HistorianConfiguration.RequestTimeoutSeconds"/> if positive. The
|
||||
/// read paths use the resulting token in their <c>ThrowIfCancellationRequested</c>
|
||||
/// checks so a hung <c>StartQuery</c> or slow <c>MoveNext</c> cannot block the
|
||||
/// single TCP-server connection thread indefinitely. See
|
||||
/// Driver.Historian.Wonderware-010.
|
||||
/// </summary>
|
||||
/// <param name="cfg">The historian configuration.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
internal static CancellationTokenSource BuildRequestCts(HistorianConfiguration cfg, CancellationToken ct)
|
||||
{
|
||||
var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
if (cfg.RequestTimeoutSeconds > 0)
|
||||
{
|
||||
cts.CancelAfter(TimeSpan.FromSeconds(cfg.RequestTimeoutSeconds));
|
||||
}
|
||||
return cts;
|
||||
}
|
||||
|
||||
private (HistorianAccess Connection, string Node) ConnectToAnyHealthyNode(HistorianConnectionType type)
|
||||
{
|
||||
var candidates = _picker.GetHealthyNodes();
|
||||
if (candidates.Count == 0)
|
||||
{
|
||||
var total = _picker.NodeCount;
|
||||
throw new InvalidOperationException(
|
||||
total == 0
|
||||
? "No historian nodes configured"
|
||||
: $"All {total} historian nodes are in cooldown — no healthy endpoints to connect to");
|
||||
}
|
||||
|
||||
Exception? lastException = null;
|
||||
foreach (var node in candidates)
|
||||
{
|
||||
var attemptConfig = CloneConfigWithServerName(node);
|
||||
try
|
||||
{
|
||||
var conn = _factory.CreateAndConnect(attemptConfig, type);
|
||||
_picker.MarkHealthy(node);
|
||||
return (conn, node);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_picker.MarkFailed(node, ex.Message);
|
||||
lastException = ex;
|
||||
Log.Warning(ex, "Historian node {Node} failed during connect attempt; trying next candidate", node);
|
||||
}
|
||||
}
|
||||
|
||||
var inner = lastException?.Message ?? "(no detail)";
|
||||
throw new InvalidOperationException(
|
||||
$"All {candidates.Count} healthy historian candidate(s) failed during connect: {inner}",
|
||||
lastException);
|
||||
}
|
||||
|
||||
private HistorianConfiguration CloneConfigWithServerName(string serverName)
|
||||
{
|
||||
return new HistorianConfiguration
|
||||
{
|
||||
Enabled = _config.Enabled,
|
||||
ServerName = serverName,
|
||||
ServerNames = _config.ServerNames,
|
||||
FailureCooldownSeconds = _config.FailureCooldownSeconds,
|
||||
IntegratedSecurity = _config.IntegratedSecurity,
|
||||
UserName = _config.UserName,
|
||||
Password = _config.Password,
|
||||
Port = _config.Port,
|
||||
CommandTimeoutSeconds = _config.CommandTimeoutSeconds,
|
||||
MaxValuesPerRead = _config.MaxValuesPerRead
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Gets a snapshot of the current health status.</summary>
|
||||
public HistorianHealthSnapshot GetHealthSnapshot()
|
||||
{
|
||||
var nodeStates = _picker.SnapshotNodeStates();
|
||||
var healthyCount = 0;
|
||||
foreach (var n in nodeStates)
|
||||
if (n.IsHealthy) healthyCount++;
|
||||
|
||||
// Driver.Historian.Wonderware-005: derive the connection-open booleans from the
|
||||
// active-node strings, both of which live under _healthLock. _connection itself
|
||||
// is published under _connectionLock — reading it here under a different lock
|
||||
// could produce an internally inconsistent snapshot (open with no node, or
|
||||
// closed with a non-null node) at the publish/clear boundary. Treating the
|
||||
// active-node strings as the single source of truth makes the snapshot
|
||||
// self-consistent by construction.
|
||||
lock (_healthLock)
|
||||
{
|
||||
return new HistorianHealthSnapshot
|
||||
{
|
||||
TotalQueries = _totalSuccesses + _totalFailures,
|
||||
TotalSuccesses = _totalSuccesses,
|
||||
TotalFailures = _totalFailures,
|
||||
ConsecutiveFailures = _consecutiveFailures,
|
||||
LastSuccessTime = _lastSuccessTime,
|
||||
LastFailureTime = _lastFailureTime,
|
||||
LastError = _lastError,
|
||||
ProcessConnectionOpen = _activeProcessNode != null,
|
||||
EventConnectionOpen = _activeEventNode != null,
|
||||
ActiveProcessNode = _activeProcessNode,
|
||||
ActiveEventNode = _activeEventNode,
|
||||
NodeCount = nodeStates.Count,
|
||||
HealthyNodeCount = healthyCount,
|
||||
Nodes = nodeStates
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private void RecordSuccess()
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
_totalSuccesses++;
|
||||
_lastSuccessTime = DateTime.UtcNow;
|
||||
_consecutiveFailures = 0;
|
||||
_lastError = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void RecordFailure(string error)
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
_totalFailures++;
|
||||
_lastFailureTime = DateTime.UtcNow;
|
||||
_consecutiveFailures++;
|
||||
_lastError = error;
|
||||
}
|
||||
}
|
||||
|
||||
private void EnsureConnected()
|
||||
{
|
||||
if (_disposed)
|
||||
throw new ObjectDisposedException(nameof(HistorianDataSource));
|
||||
|
||||
if (Volatile.Read(ref _connection) != null) return;
|
||||
|
||||
var (conn, winningNode) = ConnectToAnyHealthyNode(HistorianConnectionType.Process);
|
||||
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
conn.CloseConnection(out _);
|
||||
conn.Dispose();
|
||||
throw new ObjectDisposedException(nameof(HistorianDataSource));
|
||||
}
|
||||
|
||||
if (_connection != null)
|
||||
{
|
||||
conn.CloseConnection(out _);
|
||||
conn.Dispose();
|
||||
return;
|
||||
}
|
||||
|
||||
_connection = conn;
|
||||
lock (_healthLock) _activeProcessNode = winningNode;
|
||||
Log.Information("Historian SDK connection opened to {Server}:{Port}", winningNode, _config.Port);
|
||||
}
|
||||
}
|
||||
|
||||
private void HandleConnectionError(Exception? ex = null)
|
||||
{
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_connection == null) return;
|
||||
|
||||
try
|
||||
{
|
||||
_connection.CloseConnection(out _);
|
||||
_connection.Dispose();
|
||||
}
|
||||
catch (Exception disposeEx)
|
||||
{
|
||||
Log.Debug(disposeEx, "Error disposing Historian SDK connection during error recovery");
|
||||
}
|
||||
|
||||
_connection = null;
|
||||
string? failedNode;
|
||||
lock (_healthLock)
|
||||
{
|
||||
failedNode = _activeProcessNode;
|
||||
_activeProcessNode = null;
|
||||
}
|
||||
|
||||
if (failedNode != null) _picker.MarkFailed(failedNode, ex?.Message ?? "mid-query failure");
|
||||
Log.Warning(ex, "Historian SDK connection reset (node={Node})", failedNode ?? "(unknown)");
|
||||
}
|
||||
}
|
||||
|
||||
private void EnsureEventConnected()
|
||||
{
|
||||
if (_disposed)
|
||||
throw new ObjectDisposedException(nameof(HistorianDataSource));
|
||||
|
||||
if (Volatile.Read(ref _eventConnection) != null) return;
|
||||
|
||||
var (conn, winningNode) = ConnectToAnyHealthyNode(HistorianConnectionType.Event);
|
||||
|
||||
lock (_eventConnectionLock)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
conn.CloseConnection(out _);
|
||||
conn.Dispose();
|
||||
throw new ObjectDisposedException(nameof(HistorianDataSource));
|
||||
}
|
||||
|
||||
if (_eventConnection != null)
|
||||
{
|
||||
conn.CloseConnection(out _);
|
||||
conn.Dispose();
|
||||
return;
|
||||
}
|
||||
|
||||
_eventConnection = conn;
|
||||
lock (_healthLock) _activeEventNode = winningNode;
|
||||
Log.Information("Historian SDK event connection opened to {Server}:{Port}", winningNode, _config.Port);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Internal exception signalling that <c>StartQuery</c> returned an SDK error
|
||||
/// whose code is <em>query-class</em> (bad tag name, unsupported aggregate, etc.)
|
||||
/// and the shared SDK connection therefore must NOT be reset. The outer catch
|
||||
/// re-throws this so the IPC frame handler surfaces <c>Success=false</c> without
|
||||
/// touching the connection. See Driver.Historian.Wonderware-008.
|
||||
/// </summary>
|
||||
internal sealed class QueryClassStartQueryException : InvalidOperationException
|
||||
{
|
||||
/// <summary>Gets the error code that caused the exception.</summary>
|
||||
public HistorianAccessError.ErrorValue Code { get; }
|
||||
/// <summary>Initializes a new instance of the <see cref="QueryClassStartQueryException"/> class.</summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
/// <param name="code">The historian access error code.</param>
|
||||
public QueryClassStartQueryException(string message, HistorianAccessError.ErrorValue code)
|
||||
: base(message)
|
||||
{
|
||||
Code = code;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Centralised <c>StartQuery</c>-failure handler. Throws so the caller surfaces
|
||||
/// <c>Success=false</c> in the IPC reply (the previous return-empty-with-success
|
||||
/// behaviour made an SDK error look like "no data in range" to the client). The
|
||||
/// connection is only reset when the error code is connection-class —
|
||||
/// query-class failures (bad tag name, unsupported aggregate, etc.) must leave
|
||||
/// the shared SDK connection intact, otherwise a burst of bad-tag queries cycles
|
||||
/// the connection and pushes a healthy cluster node into cooldown.
|
||||
/// See Driver.Historian.Wonderware-008.
|
||||
/// </summary>
|
||||
private void HandleStartQueryFailure(
|
||||
string operation, HistorianAccessError error, bool isEventConnection)
|
||||
{
|
||||
var code = error?.ErrorCode ?? HistorianAccessError.ErrorValue.Failure;
|
||||
var description = error?.ErrorDescription ?? string.Empty;
|
||||
var connectionClass = IsConnectionClassError(code);
|
||||
|
||||
Log.Warning(
|
||||
"Historian SDK StartQuery failed: {Operation} -> {Code} ({Desc}) [{Kind}]",
|
||||
operation, code, description,
|
||||
connectionClass ? "connection-class" : "query-class");
|
||||
RecordFailure($"{operation}: {code}");
|
||||
|
||||
var message = $"Historian SDK StartQuery failed for {operation}: {code} ({description})";
|
||||
|
||||
if (connectionClass)
|
||||
{
|
||||
if (isEventConnection) HandleEventConnectionError();
|
||||
else HandleConnectionError();
|
||||
throw new InvalidOperationException(message);
|
||||
}
|
||||
|
||||
// Query-class — the outer catch block must NOT call HandleConnectionError on this.
|
||||
throw new QueryClassStartQueryException(message, code);
|
||||
}
|
||||
|
||||
private void HandleEventConnectionError(Exception? ex = null)
|
||||
{
|
||||
lock (_eventConnectionLock)
|
||||
{
|
||||
if (_eventConnection == null) return;
|
||||
|
||||
try
|
||||
{
|
||||
_eventConnection.CloseConnection(out _);
|
||||
_eventConnection.Dispose();
|
||||
}
|
||||
catch (Exception disposeEx)
|
||||
{
|
||||
Log.Debug(disposeEx, "Error disposing Historian SDK event connection during error recovery");
|
||||
}
|
||||
|
||||
_eventConnection = null;
|
||||
string? failedNode;
|
||||
lock (_healthLock)
|
||||
{
|
||||
failedNode = _activeEventNode;
|
||||
_activeEventNode = null;
|
||||
}
|
||||
|
||||
if (failedNode != null) _picker.MarkFailed(failedNode, ex?.Message ?? "mid-query failure");
|
||||
Log.Warning(ex, "Historian SDK event connection reset (node={Node})", failedNode ?? "(unknown)");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Reads raw historical samples for the specified tag.</summary>
|
||||
/// <param name="tagName">The tag name.</param>
|
||||
/// <param name="startTime">The start time for the query.</param>
|
||||
/// <param name="endTime">The end time for the query.</param>
|
||||
/// <param name="maxValues">The maximum number of values to return.</param>
|
||||
/// <param name="ct">Cancellation token for the operation.</param>
|
||||
public Task<List<HistorianSample>> ReadRawAsync(
|
||||
string tagName, DateTime startTime, DateTime endTime, int maxValues,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new List<HistorianSample>();
|
||||
|
||||
// Driver.Historian.Wonderware-010: wire RequestTimeoutSeconds into the read path
|
||||
// so a hung StartQuery / slow MoveNext can't block the TCP connection thread forever.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureConnected();
|
||||
|
||||
using var query = _connection!.CreateHistoryQuery();
|
||||
var args = new HistoryQueryArgs
|
||||
{
|
||||
TagNames = new StringCollection { tagName },
|
||||
StartDateTime = startTime,
|
||||
EndDateTime = endTime,
|
||||
RetrievalMode = HistorianRetrievalMode.Full
|
||||
};
|
||||
|
||||
if (maxValues > 0)
|
||||
args.BatchSize = (uint)maxValues;
|
||||
else if (_config.MaxValuesPerRead > 0)
|
||||
args.BatchSize = (uint)_config.MaxValuesPerRead;
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
HandleStartQueryFailure(
|
||||
$"raw query for tag '{tagName}'", error, isEventConnection: false);
|
||||
}
|
||||
|
||||
var count = 0;
|
||||
var limit = maxValues > 0 ? maxValues : _config.MaxValuesPerRead;
|
||||
|
||||
while (query.MoveNext(out error))
|
||||
{
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
var result = query.QueryResult;
|
||||
var timestamp = DateTime.SpecifyKind(result.StartDateTime, DateTimeKind.Utc);
|
||||
|
||||
results.Add(new HistorianSample
|
||||
{
|
||||
Value = SelectValue(result),
|
||||
TimestampUtc = timestamp,
|
||||
Quality = (byte)(result.OpcQuality & 0xFF),
|
||||
});
|
||||
|
||||
count++;
|
||||
if (limit > 0 && count >= limit) break;
|
||||
}
|
||||
|
||||
query.EndQuery(out _);
|
||||
RecordSuccess();
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (ObjectDisposedException) { throw; }
|
||||
catch (QueryClassStartQueryException)
|
||||
{
|
||||
// Query-class StartQuery failure — HandleStartQueryFailure already logged
|
||||
// and recorded. Re-throw so the IPC layer surfaces Success=false instead of
|
||||
// returning an empty list (which would look like "no data in range"). The
|
||||
// connection is deliberately NOT reset. See Driver.Historian.Wonderware-008.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "HistoryRead raw failed for {Tag}", tagName);
|
||||
RecordFailure($"raw: {ex.Message}");
|
||||
HandleConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead raw: {Tag} returned {Count} values ({Start} to {End})",
|
||||
tagName, results.Count, startTime, endTime);
|
||||
|
||||
return Task.FromResult(results);
|
||||
}
|
||||
|
||||
/// <summary>Reads aggregate historical samples for the specified tag.</summary>
|
||||
/// <param name="tagName">The tag name.</param>
|
||||
/// <param name="startTime">The start time for the query.</param>
|
||||
/// <param name="endTime">The end time for the query.</param>
|
||||
/// <param name="intervalMs">The interval in milliseconds.</param>
|
||||
/// <param name="aggregateColumn">The aggregate column name.</param>
|
||||
/// <param name="ct">Cancellation token for the operation.</param>
|
||||
public Task<List<HistorianAggregateSample>> ReadAggregateAsync(
|
||||
string tagName, DateTime startTime, DateTime endTime,
|
||||
double intervalMs, string aggregateColumn,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new List<HistorianAggregateSample>();
|
||||
|
||||
// Driver.Historian.Wonderware-010: outer safety timeout — see ReadRawAsync.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureConnected();
|
||||
|
||||
using var query = _connection!.CreateAnalogSummaryQuery();
|
||||
var args = new AnalogSummaryQueryArgs
|
||||
{
|
||||
TagNames = new StringCollection { tagName },
|
||||
StartDateTime = startTime,
|
||||
EndDateTime = endTime,
|
||||
Resolution = (ulong)intervalMs
|
||||
};
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
HandleStartQueryFailure(
|
||||
$"aggregate query for tag '{tagName}'", error, isEventConnection: false);
|
||||
}
|
||||
|
||||
// Apply the same bucket cap as the raw-read path so a wide time range with a
|
||||
// small IntervalMs cannot produce an unbounded result set that would overflow
|
||||
// the 16 MiB FrameWriter frame cap and lose the entire reply.
|
||||
var bucketLimit = _config.MaxValuesPerRead;
|
||||
var bucketCount = 0;
|
||||
|
||||
while (query.MoveNext(out error))
|
||||
{
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
var result = query.QueryResult;
|
||||
var timestamp = DateTime.SpecifyKind(result.StartDateTime, DateTimeKind.Utc);
|
||||
var value = ExtractAggregateValue(result, aggregateColumn);
|
||||
|
||||
results.Add(new HistorianAggregateSample
|
||||
{
|
||||
Value = value,
|
||||
TimestampUtc = timestamp,
|
||||
});
|
||||
|
||||
bucketCount++;
|
||||
if (bucketLimit > 0 && bucketCount >= bucketLimit)
|
||||
{
|
||||
Log.Warning(
|
||||
"HistoryRead aggregate ({Aggregate}): {Tag} truncated at {Limit} buckets — widen IntervalMs or reduce time range",
|
||||
aggregateColumn, tagName, bucketLimit);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
query.EndQuery(out _);
|
||||
RecordSuccess();
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (ObjectDisposedException) { throw; }
|
||||
catch (QueryClassStartQueryException) { throw; } // see ReadRawAsync — keep connection
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "HistoryRead aggregate failed for {Tag}", tagName);
|
||||
RecordFailure($"aggregate: {ex.Message}");
|
||||
HandleConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead aggregate ({Aggregate}): {Tag} returned {Count} values",
|
||||
aggregateColumn, tagName, results.Count);
|
||||
|
||||
return Task.FromResult(results);
|
||||
}
|
||||
|
||||
/// <summary>Reads historical samples at specific timestamps for the specified tag.</summary>
|
||||
/// <param name="tagName">The tag name.</param>
|
||||
/// <param name="timestamps">The timestamps to read.</param>
|
||||
/// <param name="ct">Cancellation token for the operation.</param>
|
||||
public Task<List<HistorianSample>> ReadAtTimeAsync(
|
||||
string tagName, DateTime[] timestamps,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new List<HistorianSample>();
|
||||
|
||||
if (timestamps == null || timestamps.Length == 0)
|
||||
return Task.FromResult(results);
|
||||
|
||||
// Driver.Historian.Wonderware-010: outer safety timeout — see ReadRawAsync.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureConnected();
|
||||
|
||||
foreach (var timestamp in timestamps)
|
||||
{
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
using var query = _connection!.CreateHistoryQuery();
|
||||
var args = new HistoryQueryArgs
|
||||
{
|
||||
TagNames = new StringCollection { tagName },
|
||||
StartDateTime = timestamp,
|
||||
EndDateTime = timestamp,
|
||||
RetrievalMode = HistorianRetrievalMode.Interpolated,
|
||||
BatchSize = 1
|
||||
};
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
// Driver.Historian.Wonderware-014: classify the failure like the raw /
|
||||
// aggregate / event paths. A connection-class code means the shared
|
||||
// connection is dead — throw so the whole at-time read aborts and the IPC
|
||||
// layer surfaces Success=false (the outer catch resets the connection and
|
||||
// marks the node failed). Without this, every remaining timestamp would
|
||||
// re-fail StartQuery on the dead connection and the method would still
|
||||
// report Success=true with an all-Bad result, never failing over. A
|
||||
// query-class / no-data code keeps the connection and records a Bad sample
|
||||
// for just this timestamp.
|
||||
if (ShouldResetConnectionForStartQueryFailure(error))
|
||||
{
|
||||
var code = error?.ErrorCode ?? HistorianAccessError.ErrorValue.Failure;
|
||||
throw new InvalidOperationException(
|
||||
$"Historian SDK StartQuery failed for at-time query of tag '{tagName}': {code} ({error?.ErrorDescription})");
|
||||
}
|
||||
|
||||
results.Add(new HistorianSample
|
||||
{
|
||||
Value = null,
|
||||
TimestampUtc = DateTime.SpecifyKind(timestamp, DateTimeKind.Utc),
|
||||
Quality = 0, // Bad
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (query.MoveNext(out error))
|
||||
{
|
||||
var result = query.QueryResult;
|
||||
results.Add(new HistorianSample
|
||||
{
|
||||
Value = SelectValue(result),
|
||||
TimestampUtc = DateTime.SpecifyKind(timestamp, DateTimeKind.Utc),
|
||||
Quality = (byte)(result.OpcQuality & 0xFF),
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
results.Add(new HistorianSample
|
||||
{
|
||||
Value = null,
|
||||
TimestampUtc = DateTime.SpecifyKind(timestamp, DateTimeKind.Utc),
|
||||
Quality = 0,
|
||||
});
|
||||
}
|
||||
|
||||
query.EndQuery(out _);
|
||||
}
|
||||
RecordSuccess();
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (ObjectDisposedException) { throw; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "HistoryRead at-time failed for {Tag}", tagName);
|
||||
RecordFailure($"at-time: {ex.Message}");
|
||||
HandleConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead at-time: {Tag} returned {Count} values for {Timestamps} timestamps",
|
||||
tagName, results.Count, timestamps.Length);
|
||||
|
||||
return Task.FromResult(results);
|
||||
}
|
||||
|
||||
/// <summary>Reads historical events within the specified time range.</summary>
|
||||
/// <param name="sourceName">The optional event source name filter.</param>
|
||||
/// <param name="startTime">The start time for the query.</param>
|
||||
/// <param name="endTime">The end time for the query.</param>
|
||||
/// <param name="maxEvents">The maximum number of events to return.</param>
|
||||
/// <param name="ct">Cancellation token for the operation.</param>
|
||||
public Task<List<HistorianEventDto>> ReadEventsAsync(
|
||||
string? sourceName, DateTime startTime, DateTime endTime, int maxEvents,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new List<HistorianEventDto>();
|
||||
|
||||
// Driver.Historian.Wonderware-010: outer safety timeout — see ReadRawAsync.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureEventConnected();
|
||||
|
||||
using var query = _eventConnection!.CreateEventQuery();
|
||||
var args = new EventQueryArgs
|
||||
{
|
||||
StartDateTime = startTime,
|
||||
EndDateTime = endTime,
|
||||
EventCount = maxEvents > 0 ? (uint)maxEvents : (uint)_config.MaxValuesPerRead,
|
||||
QueryType = HistorianEventQueryType.Events,
|
||||
EventOrder = HistorianEventOrder.Ascending
|
||||
};
|
||||
|
||||
if (!string.IsNullOrEmpty(sourceName))
|
||||
{
|
||||
query.AddEventFilter("Source", HistorianComparisionType.Equal, sourceName, out _);
|
||||
}
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
HandleStartQueryFailure(
|
||||
$"event query for source '{sourceName ?? "(all)"}'", error, isEventConnection: true);
|
||||
}
|
||||
|
||||
var count = 0;
|
||||
while (query.MoveNext(out error))
|
||||
{
|
||||
token.ThrowIfCancellationRequested();
|
||||
results.Add(ToDto(query.QueryResult));
|
||||
count++;
|
||||
if (maxEvents > 0 && count >= maxEvents) break;
|
||||
}
|
||||
|
||||
query.EndQuery(out _);
|
||||
RecordSuccess();
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (ObjectDisposedException) { throw; }
|
||||
catch (QueryClassStartQueryException) { throw; } // see ReadRawAsync — keep connection
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "HistoryRead events failed for source {Source}", sourceName ?? "(all)");
|
||||
RecordFailure($"events: {ex.Message}");
|
||||
HandleEventConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead events: source={Source} returned {Count} events ({Start} to {End})",
|
||||
sourceName ?? "(all)", results.Count, startTime, endTime);
|
||||
|
||||
return Task.FromResult(results);
|
||||
}
|
||||
|
||||
private static HistorianEventDto ToDto(HistorianEvent evt)
|
||||
{
|
||||
// The ArchestrA SDK marks these properties obsolete but still returns them; their
|
||||
// successors aren't wired in the version we bind against. Using them is the documented
|
||||
// v1 behavior — suppressed locally instead of project-wide so any non-event use of
|
||||
// deprecated SDK surface still surfaces as an error.
|
||||
#pragma warning disable CS0618
|
||||
return new HistorianEventDto
|
||||
{
|
||||
Id = evt.Id,
|
||||
Source = evt.Source,
|
||||
EventTime = evt.EventTime,
|
||||
ReceivedTime = evt.ReceivedTime,
|
||||
DisplayText = evt.DisplayText,
|
||||
Severity = (ushort)evt.Severity
|
||||
};
|
||||
#pragma warning restore CS0618
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Selects the typed value from a <see cref="HistoryQueryResult"/> row.
|
||||
/// <para>
|
||||
/// <b>SDK limitation:</b> <c>HistoryQueryResult</c> exposes only <c>Value</c>
|
||||
/// (double) and <c>StringValue</c> (string) — there is no tag data-type field on
|
||||
/// the result. The correct approach would be to branch on the tag's declared
|
||||
/// data type, but the bound version of <c>aahClientManaged</c> does not surface
|
||||
/// it per query result. The heuristic below is the best available: prefer
|
||||
/// <c>StringValue</c> only when it is non-empty AND <c>Value</c> is zero,
|
||||
/// because string tags in the Historian SDK always project to <c>Value=0</c>
|
||||
/// while numeric tags may legitimately sample to zero (in which case the SDK
|
||||
/// does not populate <c>StringValue</c>). A numeric tag at exactly zero with a
|
||||
/// non-empty formatted <c>StringValue</c> (e.g. "0.00") would be mis-reported
|
||||
/// as a string; this is a known edge case of the SDK binding.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
/// <param name="result">The history query result.</param>
|
||||
internal static object? SelectValue(HistoryQueryResult result)
|
||||
=> SelectValueFromPair(result.Value, result.StringValue);
|
||||
|
||||
/// <summary>
|
||||
/// SDK-independent overload of the string-vs-numeric heuristic. Exposed so unit
|
||||
/// tests can pin the logic without having to instantiate the SDK
|
||||
/// <see cref="HistoryQueryResult"/> (whose internal property initialisers make
|
||||
/// it impractical to fake). See Driver.Historian.Wonderware-012.
|
||||
/// </summary>
|
||||
/// <param name="value">The numeric value.</param>
|
||||
/// <param name="stringValue">The string value.</param>
|
||||
internal static object? SelectValueFromPair(double value, string? stringValue)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(stringValue) && value == 0)
|
||||
return stringValue;
|
||||
return value;
|
||||
}
|
||||
|
||||
/// <summary>Extracts the specified aggregate value from an analog summary query result.</summary>
|
||||
/// <param name="result">The analog summary query result.</param>
|
||||
/// <param name="column">The aggregate column name.</param>
|
||||
internal static double? ExtractAggregateValue(AnalogSummaryQueryResult result, string column)
|
||||
{
|
||||
switch (column)
|
||||
{
|
||||
case "Average": return result.Average;
|
||||
case "Minimum": return result.Minimum;
|
||||
case "Maximum": return result.Maximum;
|
||||
case "ValueCount": return result.ValueCount;
|
||||
case "First": return result.First;
|
||||
case "Last": return result.Last;
|
||||
case "StdDev": return result.StdDev;
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Disposes the historian data source and releases its resources.</summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
try
|
||||
{
|
||||
_connection?.CloseConnection(out _);
|
||||
_connection?.Dispose();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "Error closing Historian SDK connection");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
_eventConnection?.CloseConnection(out _);
|
||||
_eventConnection?.Dispose();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "Error closing Historian SDK event connection");
|
||||
}
|
||||
|
||||
_connection = null;
|
||||
_eventConnection = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
-29
@@ -1,29 +0,0 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// SDK-free representation of a Historian event record. Prevents ArchestrA types from
|
||||
/// leaking beyond <c>HistorianDataSource</c>.
|
||||
/// </summary>
|
||||
public sealed class HistorianEventDto
|
||||
{
|
||||
/// <summary>Gets or sets the unique identifier for the event.</summary>
|
||||
public Guid Id { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the source of the event.</summary>
|
||||
public string? Source { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the time when the event occurred.</summary>
|
||||
public DateTime EventTime { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the time when the event was received.</summary>
|
||||
public DateTime ReceivedTime { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the display text for the event.</summary>
|
||||
public string? DisplayText { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the severity level of the event.</summary>
|
||||
public ushort Severity { get; set; }
|
||||
}
|
||||
}
|
||||
-41
@@ -1,41 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Point-in-time runtime health of the historian subsystem — consumed by the status dashboard
|
||||
/// via an IPC health query (not wired in PR #5; deferred).
|
||||
/// </summary>
|
||||
public sealed class HistorianHealthSnapshot
|
||||
{
|
||||
/// <summary>Gets or sets the total number of queries executed.</summary>
|
||||
public long TotalQueries { get; set; }
|
||||
/// <summary>Gets or sets the total number of successful queries.</summary>
|
||||
public long TotalSuccesses { get; set; }
|
||||
/// <summary>Gets or sets the total number of failed queries.</summary>
|
||||
public long TotalFailures { get; set; }
|
||||
/// <summary>Gets or sets the number of consecutive failures.</summary>
|
||||
public int ConsecutiveFailures { get; set; }
|
||||
/// <summary>Gets or sets the time of the last successful query.</summary>
|
||||
public DateTime? LastSuccessTime { get; set; }
|
||||
/// <summary>Gets or sets the time of the last failed query.</summary>
|
||||
public DateTime? LastFailureTime { get; set; }
|
||||
/// <summary>Gets or sets the last error message, if any.</summary>
|
||||
public string? LastError { get; set; }
|
||||
/// <summary>Gets or sets a value indicating whether the process connection is open.</summary>
|
||||
public bool ProcessConnectionOpen { get; set; }
|
||||
/// <summary>Gets or sets a value indicating whether the event connection is open.</summary>
|
||||
public bool EventConnectionOpen { get; set; }
|
||||
/// <summary>Gets or sets the name of the active process node.</summary>
|
||||
public string? ActiveProcessNode { get; set; }
|
||||
/// <summary>Gets or sets the name of the active event node.</summary>
|
||||
public string? ActiveEventNode { get; set; }
|
||||
/// <summary>Gets or sets the total number of cluster nodes.</summary>
|
||||
public int NodeCount { get; set; }
|
||||
/// <summary>Gets or sets the number of healthy cluster nodes.</summary>
|
||||
public int HealthyNodeCount { get; set; }
|
||||
/// <summary>Gets or sets the list of cluster node states.</summary>
|
||||
public List<HistorianClusterNodeState> Nodes { get; set; } = new();
|
||||
}
|
||||
}
|
||||
-48
@@ -1,48 +0,0 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a raw OPC DA quality byte (as returned by Wonderware Historian's <c>OpcQuality</c>)
|
||||
/// to an OPC UA <c>StatusCode</c> uint. Preserves specific codes (BadNotConnected,
|
||||
/// UncertainSubNormal, etc.) instead of collapsing to Good/Uncertain/Bad categories.
|
||||
/// Mirrors v1 <c>QualityMapper.MapToOpcUaStatusCode</c> without pulling in OPC UA types —
|
||||
/// the returned value is the 32-bit OPC UA <c>StatusCode</c> wire encoding that the Proxy
|
||||
/// surfaces directly as <c>DataValueSnapshot.StatusCode</c>.
|
||||
/// </summary>
|
||||
public static class HistorianQualityMapper
|
||||
{
|
||||
/// <summary>
|
||||
/// Map an 8-bit OPC DA quality byte to the corresponding OPC UA StatusCode. The byte
|
||||
/// family bits decide the category (Good >= 192, Uncertain 64-191, Bad 0-63); the
|
||||
/// low-nibble subcode selects the specific code.
|
||||
/// </summary>
|
||||
/// <param name="q">The OPC DA quality byte.</param>
|
||||
/// <returns>The corresponding OPC UA status code.</returns>
|
||||
public static uint Map(byte q) => q switch
|
||||
{
|
||||
// Good family (192+)
|
||||
192 => 0x00000000u, // Good
|
||||
216 => 0x00D80000u, // Good_LocalOverride
|
||||
|
||||
// Uncertain family (64-191)
|
||||
64 => 0x40000000u, // Uncertain
|
||||
68 => 0x40900000u, // Uncertain_LastUsableValue
|
||||
80 => 0x40930000u, // Uncertain_SensorNotAccurate
|
||||
84 => 0x40940000u, // Uncertain_EngineeringUnitsExceeded
|
||||
88 => 0x40950000u, // Uncertain_SubNormal
|
||||
|
||||
// Bad family (0-63)
|
||||
0 => 0x80000000u, // Bad
|
||||
4 => 0x80890000u, // Bad_ConfigurationError
|
||||
8 => 0x808A0000u, // Bad_NotConnected
|
||||
12 => 0x808B0000u, // Bad_DeviceFailure
|
||||
16 => 0x808C0000u, // Bad_SensorFailure
|
||||
20 => 0x80050000u, // Bad_CommunicationError
|
||||
24 => 0x808D0000u, // Bad_OutOfService
|
||||
32 => 0x80320000u, // Bad_WaitingForInitialData
|
||||
|
||||
// Unknown code — fall back to the category so callers still get a sensible bucket.
|
||||
_ when q >= 192 => 0x00000000u,
|
||||
_ when q >= 64 => 0x40000000u,
|
||||
_ => 0x80000000u,
|
||||
};
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// OPC-UA-free representation of a single historical data point. The sidecar serialises
|
||||
/// these onto the TCP wire (<c>HistorianSampleDto</c>) for the .NET 10
|
||||
/// <c>WonderwareHistorianClient</c>, which maps quality and value into OPC UA
|
||||
/// <c>DataValue</c> on its side. Raw OPC DA quality byte is preserved so the client
|
||||
/// can reuse the same quality mapper it already uses for live reads.
|
||||
/// </summary>
|
||||
public sealed class HistorianSample
|
||||
{
|
||||
/// <summary>Gets or sets the historical data value.</summary>
|
||||
public object? Value { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the raw OPC DA quality byte from the historian SDK (low 8 bits of OpcQuality).</summary>
|
||||
public byte Quality { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the UTC timestamp of the historical sample.</summary>
|
||||
public DateTime TimestampUtc { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Result of <see cref="IHistorianDataSource.ReadAggregateAsync"/>. When <see cref="Value"/> is
|
||||
/// null the aggregate is unavailable for that bucket — the client maps to <c>BadNoData</c>.
|
||||
/// </summary>
|
||||
public sealed class HistorianAggregateSample
|
||||
{
|
||||
/// <summary>Gets or sets the aggregate value, or null if unavailable.</summary>
|
||||
public double? Value { get; set; }
|
||||
/// <summary>Gets or sets the UTC timestamp of the aggregate sample.</summary>
|
||||
public DateTime TimestampUtc { get; set; }
|
||||
}
|
||||
}
|
||||
-32
@@ -1,32 +0,0 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// The actual aahClientManaged-bound writer. Extracted so unit tests can
|
||||
/// substitute a fake without touching the SDK; the production
|
||||
/// implementation lives in <see cref="SdkAlarmHistorianWriteBackend"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Implementations are responsible for connection management + cluster
|
||||
/// failover. The wrapping <see cref="AahClientManagedAlarmEventWriter"/>
|
||||
/// handles batch-level orchestration but delegates the per-event SDK call
|
||||
/// here so the unit tests can drive every documented MxStatus outcome
|
||||
/// without an installed AVEVA Historian.
|
||||
/// </remarks>
|
||||
public interface IAlarmHistorianWriteBackend
|
||||
{
|
||||
/// <summary>
|
||||
/// Persist the supplied events to the historian. Returns one outcome per
|
||||
/// input slot in the same order — must always return an array of the same
|
||||
/// length as <paramref name="events"/>.
|
||||
/// </summary>
|
||||
/// <param name="events">The events to write to the historian.</param>
|
||||
/// <param name="cancellationToken">Token to cancel the operation.</param>
|
||||
Task<AlarmHistorianWriteOutcome[]> WriteBatchAsync(
|
||||
AlarmHistorianEventDto[] events,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
}
|
||||
-105
@@ -1,105 +0,0 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using ArchestrA;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates and opens Historian SDK connections. Extracted so tests can inject fakes that
|
||||
/// control connection success, failure, and timeout behavior.
|
||||
/// </summary>
|
||||
internal interface IHistorianConnectionFactory
|
||||
{
|
||||
/// <summary>
|
||||
/// Opens a Historian SDK connection. <paramref name="readOnly"/> defaults to
|
||||
/// <c>true</c> for the query path; the alarm-event write backend passes
|
||||
/// <c>false</c> because <c>HistorianAccess.AddStreamedValue</c> fails with
|
||||
/// <c>WriteToReadOnlyFile</c> on a read-only session.
|
||||
/// </summary>
|
||||
/// <param name="config">The historian configuration.</param>
|
||||
/// <param name="type">The type of connection to create.</param>
|
||||
/// <param name="readOnly">Whether the connection should be read-only.</param>
|
||||
/// <returns>An open HistorianAccess connection.</returns>
|
||||
HistorianAccess CreateAndConnect(
|
||||
HistorianConfiguration config, HistorianConnectionType type, bool readOnly = true);
|
||||
}
|
||||
|
||||
/// <summary>Production implementation — opens real Historian SDK connections.</summary>
|
||||
internal sealed class SdkHistorianConnectionFactory : IHistorianConnectionFactory
|
||||
{
|
||||
/// <summary>Creates and connects a Historian SDK connection.</summary>
|
||||
/// <param name="config">The historian configuration.</param>
|
||||
/// <param name="type">The type of connection to create.</param>
|
||||
/// <param name="readOnly">Whether the connection should be read-only.</param>
|
||||
/// <returns>An open HistorianAccess connection.</returns>
|
||||
public HistorianAccess CreateAndConnect(
|
||||
HistorianConfiguration config, HistorianConnectionType type, bool readOnly = true)
|
||||
{
|
||||
var conn = new HistorianAccess();
|
||||
var args = BuildConnectionArgs(config, type, readOnly);
|
||||
|
||||
if (!conn.OpenConnection(args, out var error))
|
||||
{
|
||||
conn.Dispose();
|
||||
throw new InvalidOperationException(
|
||||
$"Failed to open Historian SDK connection to {config.ServerName}:{config.Port}: {error.ErrorCode}");
|
||||
}
|
||||
|
||||
var timeoutMs = config.CommandTimeoutSeconds * 1000;
|
||||
var elapsed = 0;
|
||||
while (elapsed < timeoutMs)
|
||||
{
|
||||
var status = new HistorianConnectionStatus();
|
||||
conn.GetConnectionStatus(ref status);
|
||||
|
||||
if (status.ConnectedToServer)
|
||||
return conn;
|
||||
|
||||
if (status.ErrorOccurred)
|
||||
{
|
||||
conn.Dispose();
|
||||
throw new InvalidOperationException(
|
||||
$"Historian SDK connection failed: {status.Error}");
|
||||
}
|
||||
|
||||
Thread.Sleep(250);
|
||||
elapsed += 250;
|
||||
}
|
||||
|
||||
conn.Dispose();
|
||||
throw new TimeoutException(
|
||||
$"Historian SDK connection to {config.ServerName}:{config.Port} timed out after {config.CommandTimeoutSeconds}s");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the <see cref="HistorianConnectionArgs"/> for a connection. Pure (no SDK
|
||||
/// side effects) so the read-only-vs-write argument shaping is unit-testable.
|
||||
/// </summary>
|
||||
/// <param name="config">The historian configuration.</param>
|
||||
/// <param name="type">The type of connection to create.</param>
|
||||
/// <param name="readOnly">Whether the connection should be read-only.</param>
|
||||
/// <returns>The configured connection arguments.</returns>
|
||||
internal static HistorianConnectionArgs BuildConnectionArgs(
|
||||
HistorianConfiguration config, HistorianConnectionType type, bool readOnly)
|
||||
{
|
||||
var args = new HistorianConnectionArgs
|
||||
{
|
||||
ServerName = config.ServerName,
|
||||
TcpPort = (ushort)config.Port,
|
||||
IntegratedSecurity = config.IntegratedSecurity,
|
||||
UseArchestrAUser = config.IntegratedSecurity,
|
||||
ConnectionType = type,
|
||||
ReadOnly = readOnly,
|
||||
PacketTimeout = (uint)(config.CommandTimeoutSeconds * 1000)
|
||||
};
|
||||
|
||||
if (!config.IntegratedSecurity)
|
||||
{
|
||||
args.UserName = config.UserName ?? string.Empty;
|
||||
args.Password = config.Password ?? string.Empty;
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
}
|
||||
}
|
||||
-65
@@ -1,65 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// OPC-UA-free surface for the Wonderware Historian subsystem inside the historian
|
||||
/// sidecar process. Implementations read via the aahClient* SDK; the .NET 10
|
||||
/// <c>WonderwareHistorianClient</c> on the other side of the TCP IPC maps
|
||||
/// returned samples to OPC UA <c>DataValue</c>. The v1 Galaxy.Host / Proxy hosts
|
||||
/// this lived in retired in PR 7.2.
|
||||
/// </summary>
|
||||
public interface IHistorianDataSource : IDisposable
|
||||
{
|
||||
/// <summary>Reads raw historical samples asynchronously.</summary>
|
||||
/// <param name="tagName">The tag name to read from.</param>
|
||||
/// <param name="startTime">The start time of the time range.</param>
|
||||
/// <param name="endTime">The end time of the time range.</param>
|
||||
/// <param name="maxValues">The maximum number of values to return.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
/// <returns>A task representing the asynchronous operation that returns a list of historian samples.</returns>
|
||||
Task<List<HistorianSample>> ReadRawAsync(
|
||||
string tagName, DateTime startTime, DateTime endTime, int maxValues,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>Reads aggregate historical samples asynchronously.</summary>
|
||||
/// <param name="tagName">The tag name to read from.</param>
|
||||
/// <param name="startTime">The start time of the time range.</param>
|
||||
/// <param name="endTime">The end time of the time range.</param>
|
||||
/// <param name="intervalMs">The interval in milliseconds for aggregation.</param>
|
||||
/// <param name="aggregateColumn">The column to aggregate.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
/// <returns>A task representing the asynchronous operation that returns a list of aggregate samples.</returns>
|
||||
Task<List<HistorianAggregateSample>> ReadAggregateAsync(
|
||||
string tagName, DateTime startTime, DateTime endTime,
|
||||
double intervalMs, string aggregateColumn,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>Reads historical samples at specific times asynchronously.</summary>
|
||||
/// <param name="tagName">The tag name to read from.</param>
|
||||
/// <param name="timestamps">The array of timestamps at which to read values.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
/// <returns>A task representing the asynchronous operation that returns a list of historian samples.</returns>
|
||||
Task<List<HistorianSample>> ReadAtTimeAsync(
|
||||
string tagName, DateTime[] timestamps,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>Reads historical events asynchronously.</summary>
|
||||
/// <param name="sourceName">The source name to filter events, or null for all sources.</param>
|
||||
/// <param name="startTime">The start time of the time range.</param>
|
||||
/// <param name="endTime">The end time of the time range.</param>
|
||||
/// <param name="maxEvents">The maximum number of events to return.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
/// <returns>A task representing the asynchronous operation that returns a list of historian events.</returns>
|
||||
Task<List<HistorianEventDto>> ReadEventsAsync(
|
||||
string? sourceName, DateTime startTime, DateTime endTime, int maxEvents,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>Gets a health snapshot of the data source.</summary>
|
||||
/// <returns>A HistorianHealthSnapshot containing the current health information.</returns>
|
||||
HistorianHealthSnapshot GetHealthSnapshot();
|
||||
}
|
||||
}
|
||||
-398
@@ -1,398 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ArchestrA;
|
||||
using Serilog;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// Production <see cref="IAlarmHistorianWriteBackend"/> backed by AVEVA Historian's
|
||||
/// <c>aahClientManaged</c> SDK. Each <see cref="AlarmHistorianEventDto"/> is written via
|
||||
/// <c>HistorianAccess.AddStreamedValue(HistorianEvent, out HistorianAccessError)</c> —
|
||||
/// the alarm-event write entry point pinned during PR C.1.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The write path needs its <b>own</b> connection. The query-side
|
||||
/// <see cref="HistorianDataSource"/> opens <c>ReadOnly</c> sessions, and
|
||||
/// <c>AddStreamedValue</c> on a read-only session fails with
|
||||
/// <c>WriteToReadOnlyFile</c>. This backend therefore opens a dedicated
|
||||
/// <c>ReadOnly = false</c> connection; it shares
|
||||
/// <see cref="HistorianClusterEndpointPicker"/> for node selection and failover but
|
||||
/// not the connection object itself.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Per-event <c>HistorianAccessError.ErrorValue</c> codes map onto
|
||||
/// <see cref="AlarmHistorianWriteOutcome"/> via
|
||||
/// <see cref="AahClientManagedAlarmEventWriter.MapOutcome"/>. A connection-class
|
||||
/// error aborts the remainder of the batch as
|
||||
/// <see cref="AlarmHistorianWriteOutcome.RetryPlease"/> and resets the connection so
|
||||
/// the next drain tick reconnects — possibly to a different cluster node.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The exact <c>HistorianEvent</c> field set required by the Historian is confirmed
|
||||
/// against a live install during the PR D.1 rollout smoke; <see cref="ToHistorianEvent"/>
|
||||
/// maps the unambiguous fields and carries operator comment / condition id as event
|
||||
/// properties.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class SdkAlarmHistorianWriteBackend : IAlarmHistorianWriteBackend, IDisposable
|
||||
{
|
||||
private static readonly ILogger Log = Serilog.Log.ForContext<SdkAlarmHistorianWriteBackend>();
|
||||
|
||||
// ErrorValue codes that mean the connection/server is the problem (transient) rather
|
||||
// than the event payload. These abort the rest of the batch and trigger a reconnect.
|
||||
private static readonly HashSet<HistorianAccessError.ErrorValue> ConnectionErrors =
|
||||
new HashSet<HistorianAccessError.ErrorValue>
|
||||
{
|
||||
HistorianAccessError.ErrorValue.FailedToConnect,
|
||||
HistorianAccessError.ErrorValue.FailedToCreateSession,
|
||||
HistorianAccessError.ErrorValue.NoReply,
|
||||
HistorianAccessError.ErrorValue.NotReady,
|
||||
HistorianAccessError.ErrorValue.NotInitialized,
|
||||
HistorianAccessError.ErrorValue.Stopping,
|
||||
HistorianAccessError.ErrorValue.Win32Exception,
|
||||
HistorianAccessError.ErrorValue.InvalidResponse,
|
||||
// WriteToReadOnlyFile is a connection-configuration fault, not an event-payload
|
||||
// fault: the session was opened without ReadOnly = false (a misconfiguration or
|
||||
// a regression). The event itself is fine, so it must NOT be dead-lettered.
|
||||
// Classifying it here aborts the batch and resets the connection so the
|
||||
// reconnect path re-opens a writable (ReadOnly = false) session; the deferred
|
||||
// events drain on the next tick. See Driver.Historian.Wonderware-001.
|
||||
HistorianAccessError.ErrorValue.WriteToReadOnlyFile,
|
||||
};
|
||||
|
||||
// ErrorValue codes that mean the event itself is malformed — permanent, never retried.
|
||||
private static readonly HashSet<HistorianAccessError.ErrorValue> MalformedErrors =
|
||||
new HashSet<HistorianAccessError.ErrorValue>
|
||||
{
|
||||
HistorianAccessError.ErrorValue.InvalidArgument,
|
||||
HistorianAccessError.ErrorValue.ValidationFailed,
|
||||
HistorianAccessError.ErrorValue.NullPointerArgument,
|
||||
HistorianAccessError.ErrorValue.NotImplemented,
|
||||
HistorianAccessError.ErrorValue.NotApplicable,
|
||||
};
|
||||
|
||||
private readonly HistorianConfiguration _config;
|
||||
private readonly IHistorianConnectionFactory _factory;
|
||||
private readonly HistorianClusterEndpointPicker _picker;
|
||||
private readonly object _connectionLock = new object();
|
||||
private HistorianAccess? _connection;
|
||||
private string? _activeNode;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>Initializes a new instance using the default SDK connection factory.</summary>
|
||||
/// <param name="config">The historian configuration.</param>
|
||||
public SdkAlarmHistorianWriteBackend(HistorianConfiguration config)
|
||||
: this(config, new SdkHistorianConnectionFactory(), null) { }
|
||||
|
||||
/// <summary>Initializes a new instance with injected dependencies (for testing).</summary>
|
||||
/// <param name="config">The historian configuration.</param>
|
||||
/// <param name="factory">The connection factory.</param>
|
||||
/// <param name="picker">The cluster endpoint picker, or null to use a new instance.</param>
|
||||
internal SdkAlarmHistorianWriteBackend(
|
||||
HistorianConfiguration config,
|
||||
IHistorianConnectionFactory factory,
|
||||
HistorianClusterEndpointPicker? picker = null)
|
||||
{
|
||||
_config = config ?? throw new ArgumentNullException(nameof(config));
|
||||
_factory = factory ?? throw new ArgumentNullException(nameof(factory));
|
||||
_picker = picker ?? new HistorianClusterEndpointPicker(config);
|
||||
}
|
||||
|
||||
/// <summary>Writes a batch of alarm events to the historian, returning outcomes for each event.</summary>
|
||||
/// <param name="events">The alarm events to write.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>An array of outcomes corresponding to each input event.</returns>
|
||||
public Task<AlarmHistorianWriteOutcome[]> WriteBatchAsync(
|
||||
AlarmHistorianEventDto[] events,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (events is null || events.Length == 0)
|
||||
{
|
||||
return Task.FromResult(new AlarmHistorianWriteOutcome[0]);
|
||||
}
|
||||
|
||||
var outcomes = new AlarmHistorianWriteOutcome[events.Length];
|
||||
|
||||
HistorianAccess connection;
|
||||
try
|
||||
{
|
||||
connection = EnsureConnected();
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// No reachable node — defer the whole batch so the lmxopcua-side SQLite
|
||||
// store-and-forward sink retains the rows for the next drain tick.
|
||||
Log.Warning(ex,
|
||||
"Alarm historian write connection unavailable — deferring {Count} event(s) as RetryPlease",
|
||||
events.Length);
|
||||
FillRemaining(outcomes, 0, AlarmHistorianWriteOutcome.RetryPlease);
|
||||
return Task.FromResult(outcomes);
|
||||
}
|
||||
|
||||
for (var i = 0; i < events.Length; i++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
try
|
||||
{
|
||||
var historianEvent = ToHistorianEvent(events[i]);
|
||||
if (connection.AddStreamedValue(historianEvent, out var error))
|
||||
{
|
||||
outcomes[i] = AlarmHistorianWriteOutcome.Ack;
|
||||
continue;
|
||||
}
|
||||
|
||||
var code = error?.ErrorCode ?? HistorianAccessError.ErrorValue.Failure;
|
||||
if (ConnectionErrors.Contains(code))
|
||||
{
|
||||
// Connection died mid-batch — drop it and defer this event + the rest.
|
||||
Log.Warning(
|
||||
"Alarm historian write hit connection-level error {Code} ({Desc}); resetting connection, deferring {Remaining} event(s)",
|
||||
code, error?.ErrorDescription, events.Length - i);
|
||||
HandleConnectionError(error?.ErrorDescription);
|
||||
FillRemaining(outcomes, i, AlarmHistorianWriteOutcome.RetryPlease);
|
||||
return Task.FromResult(outcomes);
|
||||
}
|
||||
|
||||
outcomes[i] = ClassifyOutcome(code);
|
||||
Log.Warning(
|
||||
"Alarm historian write rejected event {EventId}: {Code} ({Desc}) -> {Outcome}",
|
||||
events[i].EventId, code, error?.ErrorDescription, outcomes[i]);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Transport-level throw (SDK marshalling fault, broken connection) —
|
||||
// reset and defer this event + the rest.
|
||||
Log.Warning(ex,
|
||||
"Alarm historian write threw for event {EventId}; resetting connection, deferring {Remaining} event(s)",
|
||||
events[i].EventId, events.Length - i);
|
||||
HandleConnectionError(ex.Message);
|
||||
FillRemaining(outcomes, i, AlarmHistorianWriteOutcome.RetryPlease);
|
||||
return Task.FromResult(outcomes);
|
||||
}
|
||||
}
|
||||
|
||||
return Task.FromResult(outcomes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maps an <see cref="AlarmHistorianEventDto"/> onto the SDK's
|
||||
/// <c>HistorianEvent</c>. Operator comment and originating condition id ride as
|
||||
/// event properties — operator-comment fidelity is the field the value-driven
|
||||
/// fallback path cannot carry.
|
||||
/// </summary>
|
||||
/// <param name="dto">The alarm event data transfer object.</param>
|
||||
/// <returns>The mapped HistorianEvent.</returns>
|
||||
internal static HistorianEvent ToHistorianEvent(AlarmHistorianEventDto dto)
|
||||
{
|
||||
// The ArchestrA SDK marks these HistorianEvent members obsolete but still honours
|
||||
// them on write; their successors aren't wired in the version we bind against.
|
||||
// Using them is the documented v1 behaviour — mirrors HistorianDataSource.ToDto,
|
||||
// suppressed locally so any other deprecated-surface use still surfaces as an error.
|
||||
#pragma warning disable CS0618
|
||||
var historianEvent = new HistorianEvent
|
||||
{
|
||||
IsAlarm = true,
|
||||
Source = dto.SourceName ?? string.Empty,
|
||||
EventType = string.IsNullOrEmpty(dto.AlarmType) ? "Alarm" : dto.AlarmType,
|
||||
EventTime = new DateTime(dto.EventTimeUtcTicks, DateTimeKind.Utc),
|
||||
ReceivedTime = DateTime.UtcNow,
|
||||
Severity = dto.Severity,
|
||||
DisplayText = dto.Message ?? string.Empty,
|
||||
};
|
||||
|
||||
if (Guid.TryParse(dto.EventId, out var id))
|
||||
{
|
||||
historianEvent.Id = id;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Driver.Historian.Wonderware-004: an unparseable / empty EventId previously
|
||||
// left Id as Guid.Empty, which made every such alarm collide on the same id
|
||||
// with no diagnostic. Synthesize a fresh Guid so each event still gets a
|
||||
// unique identifier (the historian still accepts the write — outcome stays
|
||||
// Ack — and the sender can correlate the synthesized id via the warning log).
|
||||
var synthesized = Guid.NewGuid();
|
||||
Log.Warning(
|
||||
"Alarm historian event has non-parseable EventId {EventId} for source {Source}; synthesizing Id={SynthesizedId}",
|
||||
dto.EventId ?? "(null)", dto.SourceName ?? "(none)", synthesized);
|
||||
historianEvent.Id = synthesized;
|
||||
}
|
||||
#pragma warning restore CS0618
|
||||
|
||||
if (!string.IsNullOrEmpty(dto.AckComment))
|
||||
{
|
||||
historianEvent.AddProperty("Comment", dto.AckComment, out _);
|
||||
}
|
||||
if (!string.IsNullOrEmpty(dto.ConditionId))
|
||||
{
|
||||
historianEvent.AddProperty("ConditionId", dto.ConditionId, out _);
|
||||
}
|
||||
|
||||
return historianEvent;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Classifies a non-connection-class <c>HistorianAccessError.ErrorValue</c> into an
|
||||
/// <see cref="AlarmHistorianWriteOutcome"/> by routing it through the shared
|
||||
/// <see cref="AahClientManagedAlarmEventWriter.MapOutcome"/> mapping. Exposed for
|
||||
/// unit tests — connection-class codes are handled separately by the batch loop.
|
||||
/// </summary>
|
||||
/// <param name="code">The error code to classify.</param>
|
||||
/// <returns>The corresponding write outcome.</returns>
|
||||
internal static AlarmHistorianWriteOutcome ClassifyOutcome(HistorianAccessError.ErrorValue code)
|
||||
=> AahClientManagedAlarmEventWriter.MapOutcome(
|
||||
(int)code,
|
||||
isCommunicationError: ConnectionErrors.Contains(code),
|
||||
isMalformedInput: MalformedErrors.Contains(code));
|
||||
|
||||
private static void FillRemaining(
|
||||
AlarmHistorianWriteOutcome[] outcomes, int from, AlarmHistorianWriteOutcome value)
|
||||
{
|
||||
for (var i = from; i < outcomes.Length; i++)
|
||||
{
|
||||
outcomes[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
private HistorianAccess EnsureConnected()
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
throw new ObjectDisposedException(nameof(SdkAlarmHistorianWriteBackend));
|
||||
}
|
||||
|
||||
var existing = Volatile.Read(ref _connection);
|
||||
if (existing != null) return existing;
|
||||
|
||||
var (conn, node) = ConnectToAnyHealthyNode();
|
||||
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
SafeClose(conn);
|
||||
throw new ObjectDisposedException(nameof(SdkAlarmHistorianWriteBackend));
|
||||
}
|
||||
|
||||
if (_connection != null)
|
||||
{
|
||||
SafeClose(conn);
|
||||
return _connection;
|
||||
}
|
||||
|
||||
_connection = conn;
|
||||
_activeNode = node;
|
||||
Log.Information("Alarm historian write connection opened to {Server}:{Port}", node, _config.Port);
|
||||
return conn;
|
||||
}
|
||||
}
|
||||
|
||||
private (HistorianAccess Connection, string Node) ConnectToAnyHealthyNode()
|
||||
{
|
||||
var candidates = _picker.GetHealthyNodes();
|
||||
if (candidates.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
_picker.NodeCount == 0
|
||||
? "No historian nodes configured"
|
||||
: $"All {_picker.NodeCount} historian nodes are in cooldown — no healthy endpoints");
|
||||
}
|
||||
|
||||
Exception? lastException = null;
|
||||
foreach (var node in candidates)
|
||||
{
|
||||
try
|
||||
{
|
||||
var conn = _factory.CreateAndConnect(
|
||||
CloneConfigWithServerName(node), HistorianConnectionType.Event, readOnly: false);
|
||||
_picker.MarkHealthy(node);
|
||||
return (conn, node);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_picker.MarkFailed(node, ex.Message);
|
||||
lastException = ex;
|
||||
Log.Warning(ex, "Alarm historian node {Node} failed during write-connect; trying next", node);
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException(
|
||||
$"All {candidates.Count} healthy historian candidate(s) failed during write-connect: " +
|
||||
(lastException?.Message ?? "(no detail)"),
|
||||
lastException);
|
||||
}
|
||||
|
||||
private void HandleConnectionError(string? detail)
|
||||
{
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_connection == null) return;
|
||||
|
||||
SafeClose(_connection);
|
||||
_connection = null;
|
||||
|
||||
var failedNode = _activeNode;
|
||||
_activeNode = null;
|
||||
if (failedNode != null) _picker.MarkFailed(failedNode, detail ?? "mid-batch failure");
|
||||
Log.Warning("Alarm historian write connection reset (node={Node})", failedNode ?? "(unknown)");
|
||||
}
|
||||
}
|
||||
|
||||
private static void SafeClose(HistorianAccess conn)
|
||||
{
|
||||
try
|
||||
{
|
||||
conn.CloseConnection(out _);
|
||||
conn.Dispose();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Debug(ex, "Error closing alarm historian write connection");
|
||||
}
|
||||
}
|
||||
|
||||
private HistorianConfiguration CloneConfigWithServerName(string serverName) => new HistorianConfiguration
|
||||
{
|
||||
Enabled = _config.Enabled,
|
||||
ServerName = serverName,
|
||||
ServerNames = _config.ServerNames,
|
||||
FailureCooldownSeconds = _config.FailureCooldownSeconds,
|
||||
IntegratedSecurity = _config.IntegratedSecurity,
|
||||
UserName = _config.UserName,
|
||||
Password = _config.Password,
|
||||
Port = _config.Port,
|
||||
CommandTimeoutSeconds = _config.CommandTimeoutSeconds,
|
||||
MaxValuesPerRead = _config.MaxValuesPerRead,
|
||||
RequestTimeoutSeconds = _config.RequestTimeoutSeconds,
|
||||
};
|
||||
|
||||
/// <summary>Disposes the connection and releases resources.</summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_connection != null)
|
||||
{
|
||||
SafeClose(_connection);
|
||||
_connection = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,270 +0,0 @@
|
||||
using System;
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
// ============================================================================
|
||||
// Wire DTOs for the sidecar TCP protocol. The sidecar speaks its own legacy
|
||||
// shape (List<HistorianSample> etc.) — the .NET 10 client (PR 3.4) translates
|
||||
// to / from Core.Abstractions.DataValueSnapshot + HistoricalEvent.
|
||||
//
|
||||
// Timestamps cross the wire as DateTime ticks (long) to dodge MessagePack's
|
||||
// DateTime kind/timezone quirks; both sides convert with DateTime(ticks, Utc).
|
||||
// ============================================================================
|
||||
|
||||
/// <summary>Single historical data point. Quality is the raw OPC DA byte; client maps to OPC UA StatusCode.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HistorianSampleDto
|
||||
{
|
||||
/// <summary>MessagePack-serialized value bytes. Client deserializes per the tag's mx_data_type.</summary>
|
||||
[Key(0)] public byte[]? ValueBytes { get; set; }
|
||||
|
||||
/// <summary>Raw OPC DA quality byte from the historian SDK (low 8 bits of OpcQuality).</summary>
|
||||
[Key(1)] public byte Quality { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the timestamp in UTC ticks.</summary>
|
||||
[Key(2)] public long TimestampUtcTicks { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>Aggregate bucket; <c>Value</c> is null when the aggregate is unavailable for the bucket.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HistorianAggregateSampleDto
|
||||
{
|
||||
/// <summary>Gets or sets the aggregate value.</summary>
|
||||
[Key(0)] public double? Value { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the timestamp in UTC ticks.</summary>
|
||||
[Key(1)] public long TimestampUtcTicks { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>Historian event row.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HistorianEventDto
|
||||
{
|
||||
/// <summary>Gets or sets the event identifier.</summary>
|
||||
[Key(0)] public string EventId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the event source name.</summary>
|
||||
[Key(1)] public string? Source { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the event time in UTC ticks.</summary>
|
||||
[Key(2)] public long EventTimeUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the received time in UTC ticks.</summary>
|
||||
[Key(3)] public long ReceivedTimeUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the display text.</summary>
|
||||
[Key(4)] public string? DisplayText { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the severity.</summary>
|
||||
[Key(5)] public ushort Severity { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>Alarm event to persist back into the historian event store.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class AlarmHistorianEventDto
|
||||
{
|
||||
/// <summary>Gets or sets the event identifier.</summary>
|
||||
[Key(0)] public string EventId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the source name.</summary>
|
||||
[Key(1)] public string SourceName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the condition identifier.</summary>
|
||||
[Key(2)] public string? ConditionId { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the alarm type.</summary>
|
||||
[Key(3)] public string AlarmType { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the alarm message.</summary>
|
||||
[Key(4)] public string? Message { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the severity.</summary>
|
||||
[Key(5)] public ushort Severity { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the event time in UTC ticks.</summary>
|
||||
[Key(6)] public long EventTimeUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the acknowledgment comment.</summary>
|
||||
[Key(7)] public string? AckComment { get; set; }
|
||||
}
|
||||
|
||||
// ===== Read Raw =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadRawRequest
|
||||
{
|
||||
/// <summary>Gets or sets the tag name.</summary>
|
||||
[Key(0)] public string TagName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the start time in UTC ticks.</summary>
|
||||
[Key(1)] public long StartUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the end time in UTC ticks.</summary>
|
||||
[Key(2)] public long EndUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the maximum number of values to return.</summary>
|
||||
[Key(3)] public int MaxValues { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(4)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadRawReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the request succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the error message if the request failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the historical samples.</summary>
|
||||
[Key(3)] public HistorianSampleDto[] Samples { get; set; } = Array.Empty<HistorianSampleDto>();
|
||||
}
|
||||
|
||||
// ===== Read Processed =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadProcessedRequest
|
||||
{
|
||||
/// <summary>Gets or sets the tag name.</summary>
|
||||
[Key(0)] public string TagName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the start time in UTC ticks.</summary>
|
||||
[Key(1)] public long StartUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the end time in UTC ticks.</summary>
|
||||
[Key(2)] public long EndUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the interval in milliseconds.</summary>
|
||||
[Key(3)] public double IntervalMs { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Wonderware AnalogSummary column name: "Average", "Minimum", "Maximum", "ValueCount".
|
||||
/// The .NET 10 client maps OPC UA aggregate enum → column.
|
||||
/// </summary>
|
||||
[Key(4)] public string AggregateColumn { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(5)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadProcessedReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the request succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the error message if the request failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the aggregate sample buckets.</summary>
|
||||
[Key(3)] public HistorianAggregateSampleDto[] Buckets { get; set; } = Array.Empty<HistorianAggregateSampleDto>();
|
||||
}
|
||||
|
||||
// ===== Read At-Time =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadAtTimeRequest
|
||||
{
|
||||
/// <summary>Gets or sets the tag name.</summary>
|
||||
[Key(0)] public string TagName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets the timestamps in UTC ticks.</summary>
|
||||
[Key(1)] public long[] TimestampsUtcTicks { get; set; } = Array.Empty<long>();
|
||||
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(2)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadAtTimeReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the request succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the error message if the request failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the historical samples.</summary>
|
||||
[Key(3)] public HistorianSampleDto[] Samples { get; set; } = Array.Empty<HistorianSampleDto>();
|
||||
}
|
||||
|
||||
// ===== Read Events =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadEventsRequest
|
||||
{
|
||||
/// <summary>Gets or sets the source name.</summary>
|
||||
[Key(0)] public string? SourceName { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the start time in UTC ticks.</summary>
|
||||
[Key(1)] public long StartUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the end time in UTC ticks.</summary>
|
||||
[Key(2)] public long EndUtcTicks { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the maximum number of events to return.</summary>
|
||||
[Key(3)] public int MaxEvents { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(4)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class ReadEventsReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the request succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the error message if the request failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the historian events.</summary>
|
||||
[Key(3)] public HistorianEventDto[] Events { get; set; } = Array.Empty<HistorianEventDto>();
|
||||
}
|
||||
|
||||
// ===== Write Alarm Events =====
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class WriteAlarmEventsRequest
|
||||
{
|
||||
/// <summary>Gets or sets the alarm events to write.</summary>
|
||||
[Key(0)] public AlarmHistorianEventDto[] Events { get; set; } = Array.Empty<AlarmHistorianEventDto>();
|
||||
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(1)] public string CorrelationId { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
[MessagePackObject]
|
||||
public sealed class WriteAlarmEventsReply
|
||||
{
|
||||
/// <summary>Gets or sets the correlation identifier.</summary>
|
||||
[Key(0)] public string CorrelationId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the request succeeded.</summary>
|
||||
[Key(1)] public bool Success { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the error message if the request failed.</summary>
|
||||
[Key(2)] public string? Error { get; set; }
|
||||
|
||||
/// <summary>Per-event success flag, parallel to <see cref="WriteAlarmEventsRequest.Events"/>.</summary>
|
||||
[Key(3)] public bool[] PerEventOk { get; set; } = Array.Empty<bool>();
|
||||
|
||||
/// <summary>Per-event status parallel to the request's Events: 0=Ack, 1=Retry, 2=Permanent.
|
||||
/// Empty ⇒ an older sidecar that only sent <see cref="PerEventOk"/>; the client falls back to it.</summary>
|
||||
[Key(4)] public byte[] PerEventStatus { get; set; } = Array.Empty<byte>();
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Reads length-prefixed, kind-tagged frames from a stream. Single-consumer — do not call
|
||||
/// <see cref="ReadFrameAsync"/> from multiple threads against the same instance. Mirror of
|
||||
/// Driver.Galaxy.Shared.FrameReader; sidecar carries its own copy so the deletion of
|
||||
/// Galaxy.Shared in PR 7.2 doesn't reach the sidecar.
|
||||
/// </summary>
|
||||
public sealed class FrameReader : IDisposable
|
||||
{
|
||||
private readonly Stream _stream;
|
||||
private readonly bool _leaveOpen;
|
||||
|
||||
/// <summary>Initializes a new instance of the <see cref="FrameReader"/> class.</summary>
|
||||
/// <param name="stream">The stream to read frames from.</param>
|
||||
/// <param name="leaveOpen">Whether to leave the stream open when disposing.</param>
|
||||
public FrameReader(Stream stream, bool leaveOpen = false)
|
||||
{
|
||||
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
_leaveOpen = leaveOpen;
|
||||
}
|
||||
|
||||
/// <summary>Reads the next frame asynchronously from the stream.</summary>
|
||||
/// <param name="ct">Cancellation token for the operation.</param>
|
||||
/// <returns>A tuple of message kind and body, or null if EOF is encountered cleanly.</returns>
|
||||
public async Task<(MessageKind Kind, byte[] Body)?> ReadFrameAsync(CancellationToken ct)
|
||||
{
|
||||
var lengthPrefix = new byte[Framing.LengthPrefixSize];
|
||||
if (!await ReadExactAsync(lengthPrefix, ct).ConfigureAwait(false))
|
||||
return null; // clean EOF on frame boundary
|
||||
|
||||
var length = (lengthPrefix[0] << 24) | (lengthPrefix[1] << 16) | (lengthPrefix[2] << 8) | lengthPrefix[3];
|
||||
if (length < 0 || length > Framing.MaxFrameBodyBytes)
|
||||
throw new InvalidDataException($"Sidecar IPC frame length {length} out of range.");
|
||||
|
||||
var kindByte = _stream.ReadByte();
|
||||
if (kindByte < 0) throw new EndOfStreamException("EOF after length prefix, before kind byte.");
|
||||
|
||||
var body = new byte[length];
|
||||
if (!await ReadExactAsync(body, ct).ConfigureAwait(false))
|
||||
throw new EndOfStreamException("EOF mid-frame.");
|
||||
|
||||
return ((MessageKind)(byte)kindByte, body);
|
||||
}
|
||||
|
||||
/// <summary>Deserializes the message body to the specified type.</summary>
|
||||
/// <typeparam name="T">The type to deserialize to.</typeparam>
|
||||
/// <param name="body">The serialized message body.</param>
|
||||
public static T Deserialize<T>(byte[] body) => MessagePackSerializer.Deserialize<T>(body);
|
||||
|
||||
private async Task<bool> ReadExactAsync(byte[] buffer, CancellationToken ct)
|
||||
{
|
||||
var offset = 0;
|
||||
while (offset < buffer.Length)
|
||||
{
|
||||
var read = await _stream.ReadAsync(buffer, offset, buffer.Length - offset, ct).ConfigureAwait(false);
|
||||
if (read == 0)
|
||||
{
|
||||
if (offset == 0) return false;
|
||||
throw new EndOfStreamException($"Stream ended after reading {offset} of {buffer.Length} bytes.");
|
||||
}
|
||||
offset += read;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>Disposes the frame reader and optionally closes the underlying stream.</summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (!_leaveOpen) _stream.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Writes length-prefixed, kind-tagged MessagePack frames to a stream. Thread-safe via
|
||||
/// <see cref="SemaphoreSlim"/> so concurrent producers (heartbeat + reply paths) get
|
||||
/// serialized writes. Mirror of Driver.Galaxy.Shared.FrameWriter; sidecar carries its
|
||||
/// own copy.
|
||||
/// </summary>
|
||||
public sealed class FrameWriter : IDisposable
|
||||
{
|
||||
private readonly Stream _stream;
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
private readonly bool _leaveOpen;
|
||||
|
||||
/// <summary>Initializes a new instance of the FrameWriter.</summary>
|
||||
/// <param name="stream">The stream to write frames to.</param>
|
||||
/// <param name="leaveOpen">Whether to leave the stream open when disposed.</param>
|
||||
public FrameWriter(Stream stream, bool leaveOpen = false)
|
||||
{
|
||||
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
_leaveOpen = leaveOpen;
|
||||
}
|
||||
|
||||
/// <summary>Writes a frame with the specified message kind and serialized message.</summary>
|
||||
/// <typeparam name="T">The type of message being written.</typeparam>
|
||||
/// <param name="kind">The message kind identifier.</param>
|
||||
/// <param name="message">The message to serialize and write.</param>
|
||||
/// <param name="ct">The cancellation token.</param>
|
||||
public async Task WriteAsync<T>(MessageKind kind, T message, CancellationToken ct)
|
||||
{
|
||||
var body = MessagePackSerializer.Serialize(message, cancellationToken: ct);
|
||||
if (body.Length > Framing.MaxFrameBodyBytes)
|
||||
throw new InvalidOperationException(
|
||||
$"Sidecar IPC frame body {body.Length} exceeds {Framing.MaxFrameBodyBytes} byte cap.");
|
||||
|
||||
var lengthPrefix = new byte[Framing.LengthPrefixSize];
|
||||
// Big-endian — easy to read in hex dumps.
|
||||
lengthPrefix[0] = (byte)((body.Length >> 24) & 0xFF);
|
||||
lengthPrefix[1] = (byte)((body.Length >> 16) & 0xFF);
|
||||
lengthPrefix[2] = (byte)((body.Length >> 8) & 0xFF);
|
||||
lengthPrefix[3] = (byte)( body.Length & 0xFF);
|
||||
|
||||
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
await _stream.WriteAsync(lengthPrefix, 0, lengthPrefix.Length, ct).ConfigureAwait(false);
|
||||
_stream.WriteByte((byte)kind);
|
||||
await _stream.WriteAsync(body, 0, body.Length, ct).ConfigureAwait(false);
|
||||
await _stream.FlushAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
finally { _gate.Release(); }
|
||||
}
|
||||
|
||||
/// <summary>Disposes the frame writer and releases resources.</summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_gate.Dispose();
|
||||
if (!_leaveOpen) _stream.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Length-prefixed framing constants for the Wonderware historian sidecar TCP protocol.
|
||||
/// Each frame on the wire is:
|
||||
/// <c>[4-byte big-endian length][1-byte message kind][MessagePack body]</c>.
|
||||
/// Length is the body size only; the kind byte is not part of the prefixed length.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Mirrors the Galaxy.Shared framing exactly so the same FrameReader/FrameWriter pattern
|
||||
/// works on both sides. The sidecar's protocol is independent — both the .NET 4.8 server
|
||||
/// side and the .NET 10 client (PR 3.4) carry their own copies of these constants and
|
||||
/// stay in sync via the round-trip test matrix.
|
||||
/// </remarks>
|
||||
public static class Framing
|
||||
{
|
||||
public const int LengthPrefixSize = 4;
|
||||
public const int KindByteSize = 1;
|
||||
|
||||
/// <summary>16 MiB cap protects the receiver from a hostile or buggy peer.</summary>
|
||||
public const int MaxFrameBodyBytes = 16 * 1024 * 1024;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wire identifier for each historian sidecar message. Values are stable — never reorder;
|
||||
/// append new contracts at the end. The .NET 10 client and the .NET 4.8 sidecar must
|
||||
/// agree on every value here.
|
||||
/// </summary>
|
||||
public enum MessageKind : byte
|
||||
{
|
||||
Hello = 0x01,
|
||||
HelloAck = 0x02,
|
||||
|
||||
ReadRawRequest = 0x10,
|
||||
ReadRawReply = 0x11,
|
||||
|
||||
ReadProcessedRequest = 0x12,
|
||||
ReadProcessedReply = 0x13,
|
||||
|
||||
ReadAtTimeRequest = 0x14,
|
||||
ReadAtTimeReply = 0x15,
|
||||
|
||||
ReadEventsRequest = 0x16,
|
||||
ReadEventsReply = 0x17,
|
||||
|
||||
WriteAlarmEventsRequest = 0x20,
|
||||
WriteAlarmEventsReply = 0x21,
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
using MessagePack;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// First frame of every connection. Advertises the sidecar protocol version and the
|
||||
/// per-process shared secret the supervisor passed at spawn time.
|
||||
/// </summary>
|
||||
[MessagePackObject]
|
||||
public sealed class Hello
|
||||
{
|
||||
public const int CurrentMajor = 1;
|
||||
public const int CurrentMinor = 0;
|
||||
|
||||
/// <summary>Gets or sets the protocol major version.</summary>
|
||||
[Key(0)] public int ProtocolMajor { get; set; } = CurrentMajor;
|
||||
/// <summary>Gets or sets the protocol minor version.</summary>
|
||||
[Key(1)] public int ProtocolMinor { get; set; } = CurrentMinor;
|
||||
/// <summary>Gets or sets the peer name.</summary>
|
||||
[Key(2)] public string PeerName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Per-process shared secret — verified against the value the supervisor passed at spawn time.</summary>
|
||||
[Key(3)] public string SharedSecret { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
/// <summary>Response to a Hello handshake message.</summary>
|
||||
[MessagePackObject]
|
||||
public sealed class HelloAck
|
||||
{
|
||||
/// <summary>Gets or sets the protocol major version.</summary>
|
||||
[Key(0)] public int ProtocolMajor { get; set; } = Hello.CurrentMajor;
|
||||
/// <summary>Gets or sets the protocol minor version.</summary>
|
||||
[Key(1)] public int ProtocolMinor { get; set; } = Hello.CurrentMinor;
|
||||
|
||||
/// <summary>Gets or sets a value indicating whether the handshake was accepted.</summary>
|
||||
[Key(2)] public bool Accepted { get; set; }
|
||||
/// <summary>Gets or sets the rejection reason if Accepted is false.</summary>
|
||||
[Key(3)] public string? RejectReason { get; set; }
|
||||
/// <summary>Gets or sets the host name of the server.</summary>
|
||||
[Key(4)] public string HostName { get; set; } = string.Empty;
|
||||
}
|
||||
-334
@@ -1,334 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using MessagePack;
|
||||
using Serilog;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Sidecar-side dispatcher. Each post-Hello frame routes by <see cref="MessageKind"/> to
|
||||
/// the right historian operation and the result frame is written back through the same
|
||||
/// pipe. Per-call exceptions are caught and surfaced as <c>Success=false, Error=...</c>
|
||||
/// replies so a single bad request doesn't kill the connection.
|
||||
/// </summary>
|
||||
public sealed class HistorianFrameHandler : IFrameHandler
|
||||
{
|
||||
// WriteAlarmEventsReply.PerEventStatus byte semantics: 0=Ack, 1=Retry, 2=Permanent.
|
||||
private const byte StatusAck = 0;
|
||||
private const byte StatusRetry = 1;
|
||||
private const byte StatusPermanent = 2;
|
||||
|
||||
private readonly IHistorianDataSource _historian;
|
||||
private readonly IAlarmEventWriter? _alarmWriter;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
/// <summary>Initializes a new instance of the HistorianFrameHandler class.</summary>
|
||||
/// <param name="historian">The historian data source to query.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
/// <param name="alarmWriter">Optional alarm event writer for writebacks.</param>
|
||||
public HistorianFrameHandler(
|
||||
IHistorianDataSource historian,
|
||||
ILogger logger,
|
||||
IAlarmEventWriter? alarmWriter = null)
|
||||
{
|
||||
_historian = historian ?? throw new ArgumentNullException(nameof(historian));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_alarmWriter = alarmWriter;
|
||||
}
|
||||
|
||||
/// <summary>Handles an incoming frame by dispatching to the appropriate historian operation.</summary>
|
||||
/// <param name="kind">The frame message kind.</param>
|
||||
/// <param name="body">The frame body bytes.</param>
|
||||
/// <param name="writer">The frame writer for sending responses.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
public Task HandleAsync(MessageKind kind, byte[] body, FrameWriter writer, CancellationToken ct)
|
||||
=> kind switch
|
||||
{
|
||||
MessageKind.ReadRawRequest => HandleReadRawAsync(body, writer, ct),
|
||||
MessageKind.ReadProcessedRequest => HandleReadProcessedAsync(body, writer, ct),
|
||||
MessageKind.ReadAtTimeRequest => HandleReadAtTimeAsync(body, writer, ct),
|
||||
MessageKind.ReadEventsRequest => HandleReadEventsAsync(body, writer, ct),
|
||||
MessageKind.WriteAlarmEventsRequest => HandleWriteAlarmEventsAsync(body, writer, ct),
|
||||
_ => UnknownAsync(kind),
|
||||
};
|
||||
|
||||
private Task UnknownAsync(MessageKind kind)
|
||||
{
|
||||
_logger.Warning("Sidecar received unsupported frame kind {Kind}; dropping", kind);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async Task HandleReadRawAsync(byte[] body, FrameWriter writer, CancellationToken ct)
|
||||
{
|
||||
var req = MessagePackSerializer.Deserialize<ReadRawRequest>(body);
|
||||
var reply = new ReadRawReply { CorrelationId = req.CorrelationId };
|
||||
try
|
||||
{
|
||||
var samples = await _historian.ReadRawAsync(
|
||||
req.TagName,
|
||||
new DateTime(req.StartUtcTicks, DateTimeKind.Utc),
|
||||
new DateTime(req.EndUtcTicks, DateTimeKind.Utc),
|
||||
req.MaxValues,
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
reply.Success = true;
|
||||
reply.Samples = ToWire(samples);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warning(ex, "Sidecar ReadRaw failed for {Tag}", req.TagName);
|
||||
reply.Success = false;
|
||||
reply.Error = ex.Message;
|
||||
}
|
||||
|
||||
await writer.WriteAsync(MessageKind.ReadRawReply, reply, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task HandleReadProcessedAsync(byte[] body, FrameWriter writer, CancellationToken ct)
|
||||
{
|
||||
var req = MessagePackSerializer.Deserialize<ReadProcessedRequest>(body);
|
||||
var reply = new ReadProcessedReply { CorrelationId = req.CorrelationId };
|
||||
try
|
||||
{
|
||||
var buckets = await _historian.ReadAggregateAsync(
|
||||
req.TagName,
|
||||
new DateTime(req.StartUtcTicks, DateTimeKind.Utc),
|
||||
new DateTime(req.EndUtcTicks, DateTimeKind.Utc),
|
||||
req.IntervalMs,
|
||||
req.AggregateColumn,
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
reply.Success = true;
|
||||
reply.Buckets = ToWire(buckets);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warning(ex, "Sidecar ReadProcessed failed for {Tag}", req.TagName);
|
||||
reply.Success = false;
|
||||
reply.Error = ex.Message;
|
||||
}
|
||||
|
||||
await writer.WriteAsync(MessageKind.ReadProcessedReply, reply, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task HandleReadAtTimeAsync(byte[] body, FrameWriter writer, CancellationToken ct)
|
||||
{
|
||||
var req = MessagePackSerializer.Deserialize<ReadAtTimeRequest>(body);
|
||||
var reply = new ReadAtTimeReply { CorrelationId = req.CorrelationId };
|
||||
try
|
||||
{
|
||||
var timestamps = new DateTime[req.TimestampsUtcTicks.Length];
|
||||
for (var i = 0; i < timestamps.Length; i++)
|
||||
timestamps[i] = new DateTime(req.TimestampsUtcTicks[i], DateTimeKind.Utc);
|
||||
|
||||
var samples = await _historian.ReadAtTimeAsync(req.TagName, timestamps, ct).ConfigureAwait(false);
|
||||
reply.Success = true;
|
||||
reply.Samples = ToWire(samples);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warning(ex, "Sidecar ReadAtTime failed for {Tag}", req.TagName);
|
||||
reply.Success = false;
|
||||
reply.Error = ex.Message;
|
||||
}
|
||||
|
||||
await writer.WriteAsync(MessageKind.ReadAtTimeReply, reply, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task HandleReadEventsAsync(byte[] body, FrameWriter writer, CancellationToken ct)
|
||||
{
|
||||
var req = MessagePackSerializer.Deserialize<ReadEventsRequest>(body);
|
||||
var reply = new ReadEventsReply { CorrelationId = req.CorrelationId };
|
||||
try
|
||||
{
|
||||
var events = await _historian.ReadEventsAsync(
|
||||
req.SourceName,
|
||||
new DateTime(req.StartUtcTicks, DateTimeKind.Utc),
|
||||
new DateTime(req.EndUtcTicks, DateTimeKind.Utc),
|
||||
req.MaxEvents,
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
reply.Success = true;
|
||||
reply.Events = ToWire(events);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warning(ex, "Sidecar ReadEvents failed for source {Source}", req.SourceName);
|
||||
reply.Success = false;
|
||||
reply.Error = ex.Message;
|
||||
}
|
||||
|
||||
await writer.WriteAsync(MessageKind.ReadEventsReply, reply, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task HandleWriteAlarmEventsAsync(byte[] body, FrameWriter writer, CancellationToken ct)
|
||||
{
|
||||
var req = MessagePackSerializer.Deserialize<WriteAlarmEventsRequest>(body);
|
||||
|
||||
// MessagePack deserializes an absent or explicit-nil array as null, not Array.Empty.
|
||||
// Normalise here so every path below can safely dereference .Length without an NRE.
|
||||
req.Events ??= Array.Empty<AlarmHistorianEventDto>();
|
||||
|
||||
var reply = new WriteAlarmEventsReply { CorrelationId = req.CorrelationId };
|
||||
|
||||
if (_alarmWriter is null)
|
||||
{
|
||||
reply.Success = false;
|
||||
reply.Error = "Sidecar not configured with an alarm-event writer.";
|
||||
reply.PerEventOk = new bool[req.Events.Length];
|
||||
reply.PerEventStatus = AllStatus(req.Events.Length, StatusRetry);
|
||||
await writer.WriteAsync(MessageKind.WriteAlarmEventsReply, reply, ct).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// Classify each event before touching the writer: structurally-malformed
|
||||
// (poison) events can never be persisted, so mark them Permanent and exclude
|
||||
// them from the writer batch. Only the well-formed remainder is handed to the
|
||||
// writer, whose bool[] result is mapped back onto the original indices.
|
||||
var status = new byte[req.Events.Length];
|
||||
var writable = new List<AlarmHistorianEventDto>(req.Events.Length);
|
||||
var originalIndex = new List<int>(req.Events.Length);
|
||||
for (var i = 0; i < req.Events.Length; i++)
|
||||
{
|
||||
if (IsStructurallyMalformed(req.Events[i]))
|
||||
{
|
||||
status[i] = StatusPermanent;
|
||||
}
|
||||
else
|
||||
{
|
||||
originalIndex.Add(i);
|
||||
writable.Add(req.Events[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Aligned 1:1 to `writable`; empty when every event was poison (writer skipped).
|
||||
var perEvent = writable.Count == 0
|
||||
? Array.Empty<bool>()
|
||||
: await _alarmWriter.WriteAsync(writable.ToArray(), ct).ConfigureAwait(false);
|
||||
|
||||
for (var i = 0; i < originalIndex.Count; i++)
|
||||
{
|
||||
var ok = i < perEvent.Length && perEvent[i];
|
||||
status[originalIndex[i]] = ok ? StatusAck : StatusRetry;
|
||||
}
|
||||
|
||||
reply.PerEventStatus = status;
|
||||
reply.PerEventOk = StatusToOk(status);
|
||||
reply.Success = true;
|
||||
// Whole-batch Success stays true even when some events failed — per-event
|
||||
// PerEventStatus slots carry the granular result (Ack / Retry / Permanent);
|
||||
// the SQLite drain worker acks 0, retries 1, and dead-letters 2. PerEventOk
|
||||
// is kept populated for rolling-deploy back-compat with an older client.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warning(ex, "Sidecar WriteAlarmEvents failed");
|
||||
reply.Success = false;
|
||||
reply.Error = ex.Message;
|
||||
reply.PerEventOk = new bool[req.Events.Length];
|
||||
reply.PerEventStatus = AllStatus(req.Events.Length, StatusRetry);
|
||||
}
|
||||
|
||||
await writer.WriteAsync(MessageKind.WriteAlarmEventsReply, reply, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Classifies an alarm event as structurally malformed (poison): an event the historian
|
||||
/// event store can never persist regardless of retries. Such events are marked Permanent
|
||||
/// so the store-and-forward sink dead-letters them immediately instead of looping to the
|
||||
/// retry cap. A blank source name or alarm type, or a non-positive event timestamp, are
|
||||
/// the structural invariants the historian write requires.
|
||||
/// </summary>
|
||||
/// <param name="e">The candidate alarm event.</param>
|
||||
/// <returns><c>true</c> when the event is structurally malformed; otherwise <c>false</c>.</returns>
|
||||
internal static bool IsStructurallyMalformed(AlarmHistorianEventDto e) =>
|
||||
e is null
|
||||
|| string.IsNullOrWhiteSpace(e.SourceName)
|
||||
|| string.IsNullOrWhiteSpace(e.AlarmType)
|
||||
|| e.EventTimeUtcTicks <= 0;
|
||||
|
||||
private static byte[] AllStatus(int length, byte value)
|
||||
{
|
||||
var status = new byte[length];
|
||||
for (var i = 0; i < length; i++) status[i] = value;
|
||||
return status;
|
||||
}
|
||||
|
||||
private static bool[] StatusToOk(byte[] status)
|
||||
{
|
||||
var ok = new bool[status.Length];
|
||||
for (var i = 0; i < status.Length; i++) ok[i] = status[i] == StatusAck;
|
||||
return ok;
|
||||
}
|
||||
|
||||
private static HistorianSampleDto[] ToWire(List<HistorianSample> samples)
|
||||
{
|
||||
var dtos = new HistorianSampleDto[samples.Count];
|
||||
for (var i = 0; i < samples.Count; i++)
|
||||
{
|
||||
var s = samples[i];
|
||||
dtos[i] = new HistorianSampleDto
|
||||
{
|
||||
ValueBytes = s.Value is null ? null : MessagePackSerializer.Serialize(s.Value),
|
||||
Quality = s.Quality,
|
||||
TimestampUtcTicks = s.TimestampUtc.Ticks,
|
||||
};
|
||||
}
|
||||
return dtos;
|
||||
}
|
||||
|
||||
private static HistorianAggregateSampleDto[] ToWire(List<HistorianAggregateSample> samples)
|
||||
{
|
||||
var dtos = new HistorianAggregateSampleDto[samples.Count];
|
||||
for (var i = 0; i < samples.Count; i++)
|
||||
{
|
||||
dtos[i] = new HistorianAggregateSampleDto
|
||||
{
|
||||
Value = samples[i].Value,
|
||||
TimestampUtcTicks = samples[i].TimestampUtc.Ticks,
|
||||
};
|
||||
}
|
||||
return dtos;
|
||||
}
|
||||
|
||||
private static HistorianEventDto[] ToWire(List<Backend.HistorianEventDto> events)
|
||||
{
|
||||
var dtos = new HistorianEventDto[events.Count];
|
||||
for (var i = 0; i < events.Count; i++)
|
||||
{
|
||||
var e = events[i];
|
||||
dtos[i] = new HistorianEventDto
|
||||
{
|
||||
EventId = e.Id.ToString(),
|
||||
Source = e.Source,
|
||||
EventTimeUtcTicks = e.EventTime.Ticks,
|
||||
ReceivedTimeUtcTicks = e.ReceivedTime.Ticks,
|
||||
DisplayText = e.DisplayText,
|
||||
Severity = e.Severity,
|
||||
};
|
||||
}
|
||||
return dtos;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Strategy for persisting alarm events into the Wonderware Alarm & Events log. PR 3.W
|
||||
/// supplies a real implementation that drives the aahClient SDK; PR 3.3 ships the
|
||||
/// contract + a default null implementation so the sidecar can boot without one.
|
||||
/// </summary>
|
||||
public interface IAlarmEventWriter
|
||||
{
|
||||
/// <summary>
|
||||
/// Writes a batch of alarm events. Returns one boolean per input event indicating
|
||||
/// persisted vs. retry-please. The SQLite store-and-forward sink retries failed
|
||||
/// slots on the next drain tick.
|
||||
/// </summary>
|
||||
/// <param name="events">Alarm events to write.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task<bool[]> WriteAsync(AlarmHistorianEventDto[] events, CancellationToken cancellationToken);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user