Compare commits

...

110 Commits

Author SHA1 Message Date
Joseph Doherty 38cf17917a docs(historiangw): test count 741->744/723 (final-review polish tests)
Claude-Session: https://claude.ai/code/session_012SDSQ3AcaXqPcBtDESBRii
2026-06-24 19:52:02 -04:00
Joseph Doherty 71cec3dcff docs(historiangw): production-readiness warnings + refreshed test count (719->741)
Non-Development boots now also warn (warn-only) on relative runtime-artifact
paths + secret hygiene (pending.md D2/D3); TLS warn predicate broadened to
non-Development (Production + Staging).

Claude-Session: https://claude.ai/code/session_012SDSQ3AcaXqPcBtDESBRii
2026-06-24 19:43:45 -04:00
Joseph Doherty 99c153ac23 docs(historiangw): refresh test count (719 total / 698 green on macOS) 2026-06-24 18:30:27 -04:00
Joseph Doherty 5f743d05d6 docs(historiangw): single TLS port (Http1AndHttp2) deployment posture
Dev two plaintext endpoints (appsettings.Development.json) unchanged;
production now uses a single Kestrel Https Http1AndHttp2 endpoint (ALPN)
multiplexing dashboard + gRPC over one port; warn-only without TLS (valid
behind reverse proxy / Kubernetes ingress).

Claude-Session: https://claude.ai/code/session_012SDSQ3AcaXqPcBtDESBRii
2026-06-24 16:20:23 -04:00
Joseph Doherty b80abbb14b docs(index): HistorianGateway store-forward now FasterLog-backed; refresh test count (702/681) 2026-06-24 11:19:56 -04:00
Joseph Doherty 6c2d16d4af docs: refresh HistorianGateway + GalaxyRepository status in index
HistorianGateway is now pushed to gitea (gitea.dohertylan.com/dohertj2/historiangw), and
ZB.MOM.WW.GalaxyRepository is published to the Gitea feed and consumed as a PackageReference
(no longer a cross-repo ProjectReference). Updates the sister-project row, the component
table, and the GalaxyRepository narrative; test figure 584 green -> 590 total (584 on macOS).
2026-06-24 07:28:25 -04:00
Joseph Doherty a08ddab9dd chore: retire unused ZB.MOM.WW.SPHistorianClient (stale partial port; superseded by histsdk vendored in HistorianGateway; no consumers, not on feed) 2026-06-24 06:45:10 -04:00
Joseph Doherty 744eb090ac docs(scadaproj): index ZB.MOM.WW.HistorianGateway sidecar + GalaxyRepository shared lib
- Add HistorianGateway to the Runtime/implementation table (single-process
  .NET 10 x64 gRPC sidecar; no COM/x86; 584 tests; local only, not yet
  pushed to gitea)
- Update "What this repository is" count (five → six pieces of source;
  add GalaxyRepository)
- Add HistorianGateway paragraph to Cross-project relationships / Net effect
  (independent sidecar; no runtime coupling to the other three; depends on
  shared GalaxyRepository lib via ProjectReference)
- Add ZB.MOM.WW.GalaxyRepository row to Component normalization table +
  full description paragraph (built 0.1.0; consumed by HistorianGateway;
  mxaccessgw adoption is a follow-on; not yet published to Gitea feed)
- Add HistorianGateway primary commands block (build/test/run/live-integration)
- Extend Shared GLAuth note to cover HistorianGateway
2026-06-24 00:41:29 -04:00
Joseph Doherty 94512acf1f fix(galaxyrepo): drop no-op ValidateOnStart (consumer owns validation) 2026-06-23 20:36:28 -04:00
Joseph Doherty 2c6c764d3c test(galaxyrepo): projector + cache tests; dispose semaphores; pack 0.1.0 2026-06-23 20:34:32 -04:00
Joseph Doherty a30f8551e9 feat(galaxyrepo): reusable gRPC service + AddZbGalaxyRepository DI 2026-06-23 20:26:59 -04:00
Joseph Doherty afd0287f54 feat(galaxyrepo): hierarchy cache + snapshot + refresh service + projector 2026-06-23 20:22:35 -04:00
Joseph Doherty 1041f87b59 feat(galaxyrepo): SQL browse provider (hierarchy + attributes) 2026-06-23 20:12:33 -04:00
Joseph Doherty 5572edda85 feat(galaxyrepo): canonical galaxy_repository.v1 proto (neutral namespace) 2026-06-23 20:05:39 -04:00
Joseph Doherty aff7264df8 feat(galaxyrepo): scaffold ZB.MOM.WW.GalaxyRepository shared lib 2026-06-23 19:48:43 -04:00
Joseph Doherty 510b0010d6 docs(historian-gateway): implementation plan + task ledger (31 tasks) 2026-06-23 19:43:08 -04:00
Joseph Doherty 42ad31aded docs(historian-gateway): brainstormed design for ZB.MOM.WW.HistorianGateway sidecar 2026-06-23 19:31:54 -04:00
Joseph Doherty e3c0503a4f docs(sphistorianclient): mark RemoteGrpc (2023 R2) live-verified 2026-06-19 06:57:06 -04:00
Joseph Doherty a0527f9b5a fix(sphistorianclient): gRPC auth handshake uses StorageService.ValidateClientCredential
The RemoteGrpc orchestrator drove the SSPI/NTLM token loop through
HistoryService.ExchangeKey, which the 2023 R2 contract analysis shows is a
separate key-exchange/cert op — not the credential handshake. The server
rejected the NTLM Type-1 token at round 0. The Negotiate loop belongs on
StorageService.ValidateClientCredential (Handle/InBuff -> Status/OutBuff;
field names match the 2020 native contract). Live-verified end-to-end against
a 2023 R2 Historian (wonder-sql-vd03): SysTimeSec raw read returns correct
timestamped values.
2026-06-19 06:56:44 -04:00
Joseph Doherty 5f7d7e1b58 docs(sphistorianclient): document HISTORIAN_PORT env var; mark plan tasks complete 2026-06-19 06:09:43 -04:00
Joseph Doherty 78418346df build(sphistorianclient): pack 0.1.0 nupkg 2026-06-19 06:02:05 -04:00
Joseph Doherty 4920b89666 docs(sphistorianclient): correct retrieval-mode count (15) + EnsureTag verification scope 2026-06-19 06:01:07 -04:00
Joseph Doherty 989db9317d docs(sphistorianclient): add CLAUDE.md + README.md 2026-06-19 05:58:13 -04:00
Joseph Doherty 81bf7322f0 feat(sphistorianclient): add AddZbSpHistorianClient DI extension 2026-06-19 05:53:56 -04:00
Joseph Doherty 8033a7f12d fix(sphistorianclient): resolve port build/test fallout 2026-06-19 05:49:22 -04:00
Joseph Doherty 63cddfb65b feat(sphistorianclient): port SDK source + tests, rebrand namespace to ZB.MOM.WW.SPHistorianClient 2026-06-19 05:45:06 -04:00
Joseph Doherty 965f5006f2 feat(sphistorianclient): scaffold shared library skeleton (props, csprojs, slnx) 2026-06-19 05:40:10 -04:00
Joseph Doherty 294da8b2db docs(sphistorianclient): implementation plan + task tracking 2026-06-19 05:36:24 -04:00
Joseph Doherty bbb7942788 docs(sphistorianclient): approved design for ZB.MOM.WW.SPHistorianClient port 2026-06-19 05:29:51 -04:00
Joseph Doherty d5b134b117 docs: add MES + Delmia-DNC integration API/MXAccess specs
mes-delmia-integration-api.md: endpoints, request/response DTOs, and the MXAccess flag handshake for MESAPI (in-repo MesNotifier) and DelmiaIntegration (DNC Downloader.asmx -> WWNotifier /notify -> Galaxy $DelmiaReceiver). mesrec.md / nj.md: live Galaxy receiver + reactor attribute references.
2026-06-17 06:52:36 -04:00
Joseph Doherty eb8b44c29d loader: purge legacy driver in overlay namespace on teardown (self-heal nw-uns-modbus placeholder) 2026-06-08 07:07:22 -04:00
Joseph Doherty a6fa36043a loader: equipment is driver-less (drop Modbus placeholder, NULL DriverInstanceId) 2026-06-08 06:42:31 -04:00
Joseph Doherty 05a4a547f4 feat(loader): canonical EQ-+uuid EquipmentIds (passes OtOpcUa full DraftValidator); clean by UnsLine scope 2026-06-07 11:18:39 -04:00
Joseph Doherty 4d57e34ff3 docs(loader): record live-values verification + 396/1036 explanation for company overlay 2026-06-07 06:08:36 -04:00
Joseph Doherty b3d8990a0f fix(loader): keep empty folderPath distinct in vtag ids; dedupe verify args; readme wait-seconds 2026-06-07 05:07:00 -04:00
Joseph Doherty 5655b75fe6 feat(loader): company overlay as VirtualTags mirroring the galaxy mirror + verify --require-good 2026-06-07 04:59:51 -04:00
Joseph Doherty dce6f83488 loader: add populate-equipment (company-shape Equipment overlay) + scope verify-equipment
populate-equipment loads the Northwind Enterprise/Site/Area/Line/Equipment/Signal
shape from company-uns.json as a second Equipment-kind namespace (nw-uns) alongside
the galaxy mirror — 3 areas / 8 lines / 40 equipment / 1036 signals. Friendly
DisplayName, stable logical-Id NodeId. verify-equipment now scopes to the nw-area-*
overlay by default (--all for the whole tree). Verified live on :4840 against OtOpcUa
master's Equipment-namespace materialization (structure-only; leaves are
BadWaitingForInitialData). clean now drops the overlay too.
2026-06-06 16:19:53 -04:00
Joseph Doherty fd34e25cb1 feat(uns-loader): verify-equipment — recursive Equipment UNS tree browse + leaf count
browse_summary assumes the flat 2-level Galaxy hierarchy; the Equipment tree is deep
(Area/Line/Equipment/[FolderPath]/Signal). Add browse_tree (recursive leaf descent) + a
verify-equipment subcommand that reports/asserts the leaf signal count (--expect N), for
verifying OtOpcUa equipment-namespace structure materialisation. Smoke-tested against a live
:4840 (40 folders / 396 leaf signals).
2026-06-06 15:25:17 -04:00
Joseph Doherty eb26bf3248 Add Galaxy UNS artifacts + reloadable OtOpcUa loader tool
galaxy-hierarchy.json: full AVEVA Galaxy DEV hierarchy pulled live via the
MxGateway .NET client (129 objects, 14k attrs). company-uns.json/.tree.txt +
gen_uns.py: a fake-company (Northwind) ISA-95 UNS modeled on OtOpcUa's
Cluster->Namespace->Area->Line->Equipment->Tag schema, grounded in the 40
TestMachine instances. otopcua-uns-loader/: reloadable generate/populate/verify/
clean tool that recreates + verifies the galaxy mirror (396 live tags across 40
machines) in OtOpcUa's config DB after a rebuild.
2026-06-06 14:22:25 -04:00
Joseph Doherty e5a609be83 docs(theme): mark themeissues #6 resolved in 0.3.1
Interactive-render nav fix (CSS display:none-when-closed + nav-state.js
MutationObserver re-wire) shipped in 0.3.1 and verified — ScadaBridge Central UI
NavCollapseTests now pass. All six issues now resolved (5 fixed, 1 tradeoff).
2026-06-05 08:32:03 -04:00
Joseph Doherty f1efe6e081 fix(theme): 0.3.1 — interactive-render nav backstop (issue #6)
Under an interactive Blazor render mode the runtime replaces the prerendered
<details> after DOMContentLoaded, so nav-state.js (wired on load, re-run only on
'enhancedload') never wires the live rail — no aria sync, no persistence, no
active-reveal — and native <details> content-hiding is unreliable, leaving a
collapsed section's items visible. 0.3.1:
- nav-state.js: add a MutationObserver backstop that re-runs apply() when
  details.rail-section nodes are (re)inserted; idempotent via the per-element
  init guard, loop-safe (childList-only + active-reveal's !open guard).
- layout.css: explicit .rail-section:not([open]) > .rail-section-body{display:none}
  so visual collapse works across all render modes.
- themeissues.md: document issue #6; Directory.Build.props 0.3.0 -> 0.3.1.
48 bUnit tests green.
2026-06-05 07:18:30 -04:00
Joseph Doherty 0e41e7c2e4 fix(theme): resolve nav/login kit issues + bump 0.2.1 -> 0.3.0
Addresses ZB.MOM.WW.Theme/themeissues.md:
- #1 NavRailSection <summary> renders aria-expanded (SSR from Expanded),
  kept in sync by nav-state.js on restore + toggle.
- #2 nav-state.js auto-expands the section holding a.rail-link.active
  (transient via data-zbnav-transient — does not overwrite saved state).
- #3 nav-state.js re-applies on Blazor 'enhancedload' (idempotent via
  per-element init guard).
- #5 LoginCard wraps product in span.login-product + optional Heading
  override param.
- #4 documented as an accepted client-only-persistence tradeoff (no code change).

+4 bUnit tests (48 total, all green).
2026-06-05 04:42:24 -04:00
Joseph Doherty 5f97c9d1ed docs(glauth): point all dev/test LDAP at the shared GLAuth on 10.100.0.35
deployment.md / CLAUDE.md / env_vars.md: the per-app LDAP (scadabridge-ldap
container, OtOpcUa DevStubMode, per-box C:\publish\glauth) is replaced by one
shared zb-shared-glauth on 10.100.0.35:3893 (dc=zb,dc=local); source of truth
infra/glauth/. Fixed stale baseDNs (dc=lmxopcua/dc=otopcua -> dc=zb).
2026-06-04 16:37:52 -04:00
Joseph Doherty 9d373efbe0 docs(glauth): mark shared-GLAuth design implemented + all plan tasks complete 2026-06-04 16:21:13 -04:00
Joseph Doherty 4c0f1eaaf7 fix(glauth): rename OPC/Gw testers to avoid username/group case-collision
glauth exposes each group as cn=<Group> under ou=users, so a case-insensitive
(cn=x) search matched both the user and the group (2 entries -> the shared
ZB.MOM.WW.Auth.Ldap 'exactly one entry' rule failed the bind). Renamed the 4
colliding testers (readonly/writetune/alarmack/gwreader) + the 2 siblings for
consistency: opc-readonly/opc-writeop/opc-writetune/opc-writeconfig/opc-alarmack
and gw-viewer. Verified gw-viewer logs into the MxGateway dashboard as Viewer.
multi-role/admin/designer/etc. were never affected (no case-collision).
2026-06-04 16:19:33 -04:00
Joseph Doherty 0f2b2b8351 feat(glauth): merged shared dev GLAuth directory + compose + runbook (10.100.0.35)
Phase 0 of the shared-GLAuth standardization. config.toml = merged dc=zb,dc=local
directory (15 groups in partitioned 55xx/56xx/57xx families, 14 users incl.
multi-role spanning all groups, serviceaccount search account). compose runs one
glauth/glauth:latest on :3893. README is the deploy/verify runbook. Code-reviewed;
fixed scp -r idempotency in the deploy command (README + plan Task 4).
2026-06-04 15:45:41 -04:00
Joseph Doherty 5be0cec601 docs(glauth): implementation plan + tasks for shared GLAuth standardization
19 tasks across 5 phases: author scadaproj/infra/glauth/ (merged config + compose +
runbook) → deploy/verify on 10.100.0.35 (hard gate, access-prerequisite) → repoint
ScadaBridge (Mac), un-stub OtOpcUa docker-dev, repoint windev MxGateway + OtOpcUa →
retire old glauths → full cross-app verification. Co-located .tasks.json.
2026-06-04 15:37:06 -04:00
Joseph Doherty 106fb8b149 docs(glauth): shared GLAuth standardization design (dev/test consolidation onto 10.100.0.35)
Approved design: consolidate OtOpcUa, MxAccessGateway, ScadaBridge dev/test auth
onto one shared GLAuth at 10.100.0.35:3893 (dc=zb,dc=local, plaintext). App-neutral
source of truth in scadaproj/infra/glauth/; merged directory with gid families
partitioned 55xx/56xx/57xx + multi-role/admin/serviceaccount; per-app Server
repoints; incremental rollout keeping old glauths until verified.
2026-06-04 15:26:32 -04:00
Joseph Doherty b0fe7b15ca fix(theme): render app-shell on desktop Chromium via ::details-content (0.2.1)
Chromium >=121 wraps a <details>'s content in a generated ::details-content
box with content-visibility:hidden while closed. The SSR app-shell ships
closed (no JS) and hides its summary toggle at lg+, so on desktop the rail+page
were invisible and the flex-lg-row layout collapsed to a vertical stack.

Add '.app-shell::details-content { display: contents }' inside the lg+ media
query: dissolving the wrapper box reveals the content regardless of open state
and restores rail/page as direct flex children of .app-shell. Browsers without
::details-content support drop the invalid selector and fall back to the legacy
force-show. Mobile (<lg) and nested NavRailSection disclosures unaffected.

Bump 0.2.0 -> 0.2.1.
2026-06-04 10:23:05 -04:00
Joseph Doherty 3070169e5d docs(ui-theme): record post-adoption site.css prune + reconfirm 0.2.0 on feed
Audit follow-up: the deferred 'dead .sidebar/.nav-link residual' was broader than
logged (OtOpcUa's site.css duplicated and overrode the whole kit shell). Pruned
across all 3 apps on chore/theme-css-prune branches (-167/-95/-106 lines, builds
clean). Note the remaining deferred items (kit layout.css calc review; ScadaBridge
Host transitive kit ref) and reconfirm the Theme 0.2.0 publish is genuine.
2026-06-03 04:38:24 -04:00
Joseph Doherty ea4116cc5b docs(ui-theme): mark merged to local default + pushed to origin (in sync) 2026-06-03 04:15:20 -04:00
Joseph Doherty ca21615090 docs(ui-theme): record 0.2.0 publish + adoption across all 3 apps (local feat branches) 2026-06-03 04:06:20 -04:00
Joseph Doherty a474eb6bd6 chore(theme): bump 0.1.0 -> 0.2.0 (nav persistence + ThemeScripts) 2026-06-03 02:59:27 -04:00
Joseph Doherty 9e4dedc987 fix(theme): guard nav-state.js against duplicate toggle listeners 2026-06-03 02:58:34 -04:00
Joseph Doherty 6aa2ee8095 fix(theme): null/whitespace-safe NavRailSection slug + edge tests 2026-06-03 02:57:07 -04:00
Joseph Doherty e2749b7d69 feat(theme): ThemeScripts + localStorage nav-state enhancer 2026-06-03 02:55:35 -04:00
Joseph Doherty edd49765d6 feat(theme): NavRailSection data-nav-key for persistence 2026-06-03 02:53:15 -04:00
Joseph Doherty 7e11f9aac8 docs(ui-theme): implementation plan + task graph (26 tasks, Phases 0-4) 2026-06-03 02:50:31 -04:00
Joseph Doherty e6e9dbfedb docs(ui-theme): approved adoption design (publish 0.2.0 + full canonical cutover across 3 apps) 2026-06-03 02:35:00 -04:00
Joseph Doherty 6d262f7d7c docs: Auth+Audit normalization PUSHED to origin (gitea) 2026-06-03 — default branches in sync; feat/* kept locally 2026-06-03 00:36:55 -04:00
Joseph Doherty 4b90ebb588 docs: reflect final delivery — Auth+Audit normalization merged to each repo's LOCAL default (main/master) 2026-06-03, NOT pushed (origin untouched), feat/* branches kept 2026-06-03 00:31:07 -04:00
Joseph Doherty 4de61d29f5 docs: PROGRAM COMPLETE — Auth+Audit normalization adopted across all 3 repos (Phases 0-3); mark exit-gate (CLAUDE.md Auth/Audit rows + components/{auth,audit}/GAPS.md adopted, local-only/not-pushed); tasks #10/#30/#31 done 2026-06-02 15:42:23 -04:00
Joseph Doherty 1ec057a32a plan: Task 2.5 (ScadaBridge audit full re-arch C1-C7) DONE+reviewed -> PHASE 2 COMPLETE (audit adopted across all 3 repos, deep/canonical, local-only). Next = Phase 3 Actor->principal wiring 2026-06-02 15:10:54 -04:00
Joseph Doherty a591a9fb47 plan(2.5): ScadaBridge audit C5 done+reviewed (central migration, MSSQL-verified); C6 subsumed (consumer surfaces already canonical via C3 shims); C7 (perf re-baseline + cleanup) in progress 2026-06-02 14:24:32 -04:00
Joseph Doherty e9100d0b74 plan(2.5): ScadaBridge audit C4 done+reviewed (site sidecar); C5 (central migration) in progress 2026-06-02 13:34:12 -04:00
Joseph Doherty 672ac5ff04 plan(2.5): ScadaBridge audit C3 done+reviewed (record swap keystone); C4 (site sidecar) in progress 2026-06-02 13:07:32 -04:00
Joseph Doherty f073241f52 plan(2.5): ScadaBridge audit re-arch C1+C2 done (reviewed); C3 (atomic record swap) in progress 2026-06-02 11:54:57 -04:00
Joseph Doherty 98e957903f plan(2.5): ScadaBridge audit full-rearch design + C1-C7 decomposition (sidecar forwarding, new-table-copy central migration, persisted computed cols, canonical record everywhere) 2026-06-02 10:36:00 -04:00
Joseph Doherty ca2a9ac507 plan(phase2): OtOpcUa 2.1/2.2 + MxGateway 2.3 DONE (deep audit adoption, spec+code reviewed, local-only); ScadaBridge 2.5 pending variant decision 2026-06-02 10:26:55 -04:00
Joseph Doherty abe06a2163 plan(phase2): Task 2.0 gate DONE — verified plan specs materially off (MxGw store moved to lib, OtOpcUa path dormant, SB rename structurally impossible); user chose DEEP adopt + pause; corrected deep design in -phase2-deep.md; PAUSED for review 2026-06-02 09:13:09 -04:00
Joseph Doherty 95681ac0b2 plan(phase1): Tasks 1.5/1.6/1.7 done+reviewed — PHASE 1 COMPLETE across all 3 repos (claims/cookies, dev base DN dc=zb, canonical-six roles + SB SoD collapse + config-DB migrations); next = Phase 2 audit 2026-06-02 08:15:46 -04:00
Joseph Doherty d73762bf76 plan(phase1): ScadaBridge re-arch C5 done+reviewed; Task 1.3 (ApiKeys adopt) COMPLETE across all 3 repos; installer/secret catch noted 2026-06-02 05:51:10 -04:00
Joseph Doherty 02a84b074a plan(phase1): ScadaBridge re-arch C4 done+reviewed (TransportExport excludes keys); C5 (retire entity) next 2026-06-02 05:17:09 -04:00
Joseph Doherty 9b5535ea47 plan(phase1): ScadaBridge re-arch C3 done+reviewed (CentralUI onto seam); C4 next 2026-06-02 04:50:09 -04:00
Joseph Doherty 406ede19dd plan(phase1): ScadaBridge re-arch C2 done+reviewed (mgmt+CLI onto seam); C3 next 2026-06-02 04:25:02 -04:00
Joseph Doherty ba7b38a654 plan(phase1): ScadaBridge re-arch C1 done+reviewed; 2 pre-existing Host.Tests baseline reds fixed; C2 next 2026-06-02 04:03:31 -04:00
Joseph Doherty e69e9c635b plan(phase1): ScadaBridge re-arch discovered architecture (CentralUI direct-repo + TransportExport) + C1-C5 decomposition + transport=exclude-keys 2026-06-02 03:22:19 -04:00
Joseph Doherty a4f9968917 plan(phase1): Auth lib 0.1.3 published (SetScopes/SetEnabled); ScadaBridge re-arch C mapping 2026-06-02 03:14:29 -04:00
Joseph Doherty 290e85cb38 test(auth.apikeys): store-level arg guards + SetEnabledAsync idempotence (review M1/M2) 2026-06-02 03:12:24 -04:00
Joseph Doherty 468959ca8a feat(auth.apikeys): add IApiKeyAdminStore.SetScopesAsync + SetEnabledAsync (editable scopes + reversible enable, no schema change); bump 0.1.3 2026-06-02 03:08:19 -04:00
Joseph Doherty 30c60f9d5f plan(phase1): SB ApiKeys A+B foundation done+reviewed; C/D/E pending 2026-06-02 02:50:57 -04:00
Joseph Doherty d30cdea487 plan(phase1): ScadaBridge ApiKeys full-adopt re-arch spec + sub-task decomposition 2026-06-02 02:29:03 -04:00
Joseph Doherty f2b73367d5 plan(phase1): MxGateway 1.3 done+approved (lib 0.1.2); ScadaBridge 1.3 pending 2026-06-02 02:14:45 -04:00
Joseph Doherty da669bfc9b fix(auth.apikeys): stamp schema version 2 to match donor gateway DBs; bump 0.1.2
The store was extracted from MxAccessGateway, whose deployed gateway-auth.db
is at schema_version=2. The library capped at 1 and threw on a newer on-disk
version -> gateway would fail to boot. Final schema is byte-identical since v1;
stamp 2 so existing deployed DBs interoperate (no key re-issuance). +2 tests.
2026-06-02 01:45:57 -04:00
Joseph Doherty 2d50d5dcf0 plan(phase1): 1.2/1.4 done across 3 repos (lib 0.1.1); remaining 1.3/1.5-1.7 2026-06-02 01:38:50 -04:00
Joseph Doherty aecc106657 fix(auth.ldap): skip LdapOptionsValidator when Enabled=false; bump 0.1.1
A disabled LDAP provider's connection fields are inert — don't require
Server/SearchBase/ServiceAccountDn at startup when Enabled=false. Surfaced
by the MxGateway 1.2 review (dashboard LDAP can be disabled). +1 test.
2026-06-02 01:17:53 -04:00
Joseph Doherty 0586e64f64 plan(phase1): record Task 1.2 review findings + LdapOptionsValidator 0.1.1 question 2026-06-02 01:12:20 -04:00
Joseph Doherty 37c03e5fc2 plan(phase1): note Roles sub-namespace; Task 1.1 done+approved (3 repos) 2026-06-02 00:34:13 -04:00
Joseph Doherty bea08f9673 plan(phase1): lock resolved decisions (SB ApiKeys full adopt, roles, dev hatches) 2026-06-02 00:25:53 -04:00
Joseph Doherty 32fd953969 plan(phase1): Task 1.0 exploration findings + elaborated Auth cutover
Per-app cutover steps mapped to the library surface; flags 5 findings that
change the plan (OtOpcUa section is Security:Ldap not Authentication:Ldap;
singleton 'bug' already mitigated; ScadaBridge inbound API keys are a
re-architecture not a reformat; OtOpcUa config+DB mapping + DevStubMode +
2nd LDAP consumer; MxGateway ApiKeys is the low-risk donor path).
2026-06-02 00:24:03 -04:00
Joseph Doherty c715565bd2 build(audit): add Gitea push.sh mirroring Auth's 2026-06-02 00:13:24 -04:00
Joseph Doherty f98fa84e4a plan: implementation plan + task graph for Auth+Audit normalization
Phase 0 command-exact (publish + feed-map); Phases 1-3 decomposed into
bite-sized cutover tasks with files-to-edit contracts, classification,
parallelizability, and per-phase explore/elaborate gates. Co-located
.tasks.json mirrors native tasks #7-#31.
2026-06-02 00:11:48 -04:00
Joseph Doherty 6ec1ea7d65 docs: design for full Auth+Audit normalization across 3 sister projects
Approved brainstorming output: two-library program (publish + adopt
ZB.MOM.WW.Auth then ZB.MOM.WW.Audit across OtOpcUa, MxAccessGateway,
ScadaBridge), library-major waterfall, ending with audit Actor wired
from the Auth principal. Local-only delivery; verified feed/source state.
2026-06-02 00:04:33 -04:00
Joseph Doherty c3ab37523a docs: record ZB.MOM.WW.Configuration fleet-wide adoption + add design/plan
Configuration is now adopted across all three sister apps (local branches),
so flip the status lines in CLAUDE.md, components/configuration/GAPS.md, and the
lib README/CLAUDE.md from 'not adopted' to adopted (also corrects 27->42 tests).
Adds the brainstorm design doc + bite-sized implementation plan (+tasks.json)
under docs/plans/ that drove the adoption.
2026-06-01 23:18:02 -04:00
Joseph Doherty 2f124fa02c docs(observability): record telemetry follow-ons DONE (metric normalization, ScadaBridge instruments, OTLP opt-in, site metrics listener, Serilog alignment) 2026-06-01 17:16:46 -04:00
Joseph Doherty 6c2a43a238 docs: plan for ZB.MOM.WW.Telemetry follow-ons (A additive/hygiene, B metric normalization, C ScadaBridge instruments, D OTLP opt-in) 2026-06-01 16:32:57 -04:00
Joseph Doherty dee55aadc6 docs(observability): record ZB.MOM.WW.Telemetry adoption across 3 apps; correct false MxGateway logging-status claim
All 3 apps adopted on branch feat/adopt-zb-telemetry (behaviour-preserving).
Records the per-repo result + accepted scope deviations (ScadaBridge keeps
LoggerConfigurationFactory + TraceContextEnricher instead of AddZbSerilog;
MxGateway keeps GatewayLogScope, exposes redaction via ILogRedactor seam) and
deferred follow-ons (#6 ms->s, #7 meter rename, #9 app instruments, OTLP, and
the new ScadaBridge Site-node HTTP/1.1 metrics-listener item). Corrects the
prior false 'MxGateway logging adopted on its own branch' claim — that migration
actually landed in this pass.
2026-06-01 15:58:10 -04:00
Joseph Doherty 30425726d4 docs: implementation plan for ZB.MOM.WW.Telemetry adoption across the 3 sister apps
13 tasks: Task 0 publishes/verifies the 2 nupkgs on Gitea (gates all); then 3
independent per-repo phases — OtOpcUa (1-3), ScadaBridge (4-6), MxGateway (7-11,
incl. the high-risk MEL->Serilog swap) — and Task 12 scadaproj bookkeeping last.
Records two behaviour-preserving refinements vs the design: ScadaBridge keeps
LoggerConfigurationFactory (+TraceContextEnricher) instead of AddZbSerilog, and
MxGateway keeps GatewayLogScope as-is. Breaking items #6/#7 deferred.
2026-06-01 15:24:28 -04:00
Joseph Doherty 3729ff2152 docs: design for ZB.MOM.WW.Telemetry adoption across the 3 sister apps
Second cross-fleet shared-library adoption (after Health). Full scope:
AddZbTelemetry (OTel Resource identity triple + standard instrumentation +
Prometheus /metrics) on all 3, plus shared Serilog on all 3 — including the
MxGateway MEL->Serilog migration. Records the correction that MxGateway's
logging was NOT actually adopted on main despite the docs' claim. Behaviour-
preserving bar; breaking items (#6 unit, #7 rename) deferred.
2026-06-01 15:11:50 -04:00
Joseph Doherty 19f7ea5eeb docs(health): record ZB.MOM.WW.Health adoption across 3 apps + deferrals + accepted /health/active startup behaviour change 2026-06-01 13:50:09 -04:00
Joseph Doherty 1e91784ba3 docs(health-plan): publish done; fix source-mapping (two patterns); note user-level creds 2026-06-01 13:23:46 -04:00
Joseph Doherty 5a965639f9 docs: implementation plan for ZB.MOM.WW.Health adoption across the 3 sister apps
Detailed task-by-task plan (publish to Gitea, then per-repo behaviour-preserving
probe swaps) incorporating recon findings that revised the design: MxGateway worker
IPC is named pipes (custom SQLite readiness probe instead of gRPC), ScadaBridge
ActorSystem is not in DI (transient bridge), downstream gRPC probes + IDbContextFactory
switch + ScadaBridge seam unification deferred.
2026-06-01 13:15:48 -04:00
Joseph Doherty f72403d6f0 docs: design for ZB.MOM.WW.Health adoption across the 3 sister apps
Plan to integrate the built-but-unadopted Health library into OtOpcUa,
MxAccessGateway, and ScadaBridge: Gitea-registry distribution, per-repo
behaviour-preserving probe swaps (preset-based), canonical tiers + writer,
MxGateway-first sequencing.
2026-06-01 13:01:36 -04:00
Joseph Doherty f47d4e1030 docs: remove upcoming.md (remaining normalization candidates won't be standardized) 2026-06-01 12:42:51 -04:00
Joseph Doherty 7ae25f8510 Re-stamp Telemetry-002/003 resolutions: nested redaction implemented in 05cc62a
Telemetry-002 was first resolved by documenting the scalar-only limitation; it is now
implemented (recursive nested redaction). Updated the two resolution notes to record
05cc62a and the replaced limitation test, preserving the audit trail. README unchanged
(still 0 pending / 35 total).
2026-06-01 12:13:05 -04:00
Joseph Doherty 05cc62aab3 Implement nested log redaction (Telemetry-002)
RedactionEnricher now projects each property into a mutable view the ILogRedactor
can edit: scalars stay as their CLR value, while StructureValue/SequenceValue/
DictionaryValue become nested IDictionary<string,object?>/IList<object?> the
redactor descends into recursively. A field nested inside a destructured {@Object}
can now be masked or removed — closing the gap documented as a limitation.

- Project/Rebuild round-trip preserves StructureValue.TypeTag and original
  dictionary keys; redactor-synthesised plain dicts/lists are rebuilt too.
- Untouched properties are not reallocated: structural ValueEquals skips write-back
  unless a property actually changed. Scalar fast path and no-redactor/no-property
  short-circuits retained.
- +5 nested-reach tests (mask/remove a field, sequence element, dictionary value,
  two-levels-deep); the old 'cannot reach' limitation test replaced. Serilog 34, 0 warnings.
- ILogRedactor XML doc + library README updated to document the recursive reach.
2026-06-01 12:12:26 -04:00
Joseph Doherty ae0ccc9a3a Mark all baseline code-review findings resolved
All 35 findings fixed in 544a6dd and marked Status: Resolved with resolution
notes. README regenerated: 0 pending / 35 total across 6 libraries.
2026-06-01 11:22:37 -04:00
Joseph Doherty 544a6ddb77 Fix all baseline code-review findings across the six shared libraries
Resolves the 35 findings from the 2026-06-01 baseline (commit 26ba1c7),
test-first for every behavioral change. +51 tests (331 -> 382 passing, 0 failed).

- Telemetry-001 (HIGH): RedactionEnricher now honours property removal, so a
  redactor that drops a key actually scrubs the secret from the event.
- Auth: LDAP validator ValidateOnStart; API-key verify no longer fails on a
  best-effort MarkUsed write or a corrupt scopes column (fail-closed); LDAP cert
  validation hook; KeyPrefix persistence aligned; README algorithm corrected.
- Health: Akka checks return Degraded (not throw) when the cluster isn't up yet;
  GrpcDependencyHealthCheck catch-all; null 'description' rendered; composite
  endpoint builder; XML docs shipped.
- Audit: CompositeAuditWriter no longer re-throws OperationCanceledException;
  TruncatingAuditRedactor over-redact scrubs Target + safe negative max; options
  record; XML docs shipped.
- Configuration: TryAddEnumerable idempotent registration; consistent port
  quoting; strict invariant port parsing; XML docs + README packaged.
- Theme: mobile toggle is now CSS-only (no Bootstrap JS); token/CSS hygiene;
  XML docs on the public parameter surface.

Shared-contract/spec docs updated where the code was the source of truth
(observability service.instance.id, MapZbMetrics, redactor reach). All changes
additive/back-compatible at v0.1.0. code-reviews bookkeeping follows separately.
2026-06-01 11:22:14 -04:00
Joseph Doherty 26ba1c7215 Baseline code review of the six ZB.MOM.WW.* shared libraries
All six libraries reviewed at commit 5f75cd4 against their components/ specs,
following code-reviews/REVIEW-PROCESS.md. 35 findings (0 Critical, 1 High,
9 Medium, 25 Low); none block adoption.

- Auth      0/0/3/3  (security core sound; startup-validation + key-verify contract gaps)
- Telemetry 0/1/2/5  (HIGH Telemetry-001: redactor 'remove' is a no-op -> secrets reach sinks)
- Health    0/0/2/4  (Akka checks throw instead of Degraded when cluster not yet up)
- Theme     0/0/1/5  (undocumented Bootstrap-collapse JS dep; token/CSS hygiene)
- Audit     0/0/1/4  (composite re-throws OCE vs never-throw writer contract)
- Configuration 0/0/0/4 (DI idempotency, port-parse strictness, packaging)

Cross-cutting: XML docs authored but GenerateDocumentationFile unset -> docs
not shipped in any nupkg (Auth/Health/Telemetry/Configuration/Audit).

README.md regenerated from the per-library findings; regen-readme.py --check passes.
2026-06-01 11:08:12 -04:00
Joseph Doherty 5f75cd4dab Add per-library code-review scaffolding for the ZB.MOM.WW.* shared libs
Adapts the code-reviews convention (process, README generator, template) from
the ScadaBridge app model (per-src/-module, Akka conventions) to scadaproj's
reality: six shared libraries reviewed against their components/ specs.

- REVIEW-PROCESS.md: review unit is a library; library->component-spec mapping;
  checklist re-targeted for reusable .NET libs (public API/semver, packaging &
  dependency hygiene, spec/shared-contract adherence) instead of actor/supervision.
- _template/findings.md: library/packages/component-spec/shared-contract header.
- regen-readme.py: per-library prose, data-driven Summary, '-' for unreviewed.
- Seed Auth/Theme/Health/Telemetry/Configuration/Audit findings stubs (0 findings).
- README.md generated; --check passes.
2026-06-01 10:46:16 -04:00
193 changed files with 153245 additions and 388 deletions
+106 -27
View File
@@ -6,12 +6,13 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
`scadaproj` is primarily an umbrella/index workspace that aggregates a family of `scadaproj` is primarily an umbrella/index workspace that aggregates a family of
related SCADA / OT / Wonderware / OPC UA "sister projects" that live as **sibling related SCADA / OT / Wonderware / OPC UA "sister projects" that live as **sibling
directories under `~/Desktop/`**. It now also **hosts five pieces of source itself** directories under `~/Desktop/`**. It now also **hosts six pieces of source itself**
the shared [`ZB.MOM.WW.Auth/`](ZB.MOM.WW.Auth/) library, the shared the shared [`ZB.MOM.WW.Auth/`](ZB.MOM.WW.Auth/) library, the shared
[`ZB.MOM.WW.Theme/`](ZB.MOM.WW.Theme/) UI kit, the shared [`ZB.MOM.WW.Theme/`](ZB.MOM.WW.Theme/) UI kit, the shared
[`ZB.MOM.WW.Health/`](ZB.MOM.WW.Health/) health-check library, the shared [`ZB.MOM.WW.Health/`](ZB.MOM.WW.Health/) health-check library, the shared
[`ZB.MOM.WW.Telemetry/`](ZB.MOM.WW.Telemetry/) observability library, and the shared [`ZB.MOM.WW.Telemetry/`](ZB.MOM.WW.Telemetry/) observability library, the shared
[`ZB.MOM.WW.Configuration/`](ZB.MOM.WW.Configuration/) config-validation library — all the realized output of their [`ZB.MOM.WW.Configuration/`](ZB.MOM.WW.Configuration/) config-validation library, and the new
[`ZB.MOM.WW.GalaxyRepository/`](ZB.MOM.WW.GalaxyRepository/) Galaxy browse library — all the realized output of their
respective component normalizations (see [Component normalization](#component-normalization)). respective component normalizations (see [Component normalization](#component-normalization)).
The point of this file is to give a high-level scan of each sister project — its purpose, The point of this file is to give a high-level scan of each sister project — its purpose,
location, stack, and primary commands — so a fresh Claude Code session can orient across location, stack, and primary commands — so a fresh Claude Code session can orient across
@@ -30,9 +31,10 @@ own `CLAUDE.md` for the full picture. See [Refreshing this index](#refreshing-th
| Project | Location | Stack | Repo | Summary | | Project | Location | Stack | Repo | Summary |
|---|---|---|---|---| |---|---|---|---|---|
| **OtOpcUa** | `~/Desktop/OtOpcUa` | .NET 10, OPC UA, gRPC | `gitea.dohertylan.com/dohertj2/lmxopcua` | OPC UA server that exposes AVEVA System Platform (Wonderware) Galaxy tags as an OPC UA address space. Galaxy access flows through an in-process `GalaxyDriver` → gRPC → the **mxaccessgw** gateway. | | **OtOpcUa** | `~/Desktop/OtOpcUa` | .NET 10, OPC UA, gRPC | `gitea.dohertylan.com/dohertj2/lmxopcua` | OPC UA server that exposes industrial data sources under a **unified Equipment-based address space** — native-protocol drivers (Modbus, S7, AB CIP/Legacy, TwinCAT, FOCAS, OpcUaClient) **and AVEVA System Platform (Wonderware) Galaxy, now a standard Equipment-kind driver** (the old SystemPlatform mirror / alias-tag model was retired ~2026-06-12). Galaxy access flows through the in-process `GalaxyDriver` → gRPC → the **mxaccessgw** gateway. Surfaces live read + authorized write, native OPC UA Part 9 alarms, and server-side HistoryRead. |
| **MxAccessGateway** (`mxaccessgw`) | `~/Desktop/MxAccessGateway` | .NET 10 gateway (x64) + .NET 4.8 worker (**x86**), gRPC | `gitea.dohertylan.com/dohertj2/mxaccessgw` | gRPC gateway giving modern clients full MXAccess parity without loading 32-bit COM. Two-process: gateway (ASP.NET Core gRPC + Blazor dashboard) + per-session x86 worker that owns the MXAccess COM STA. **OtOpcUa depends on this.** | | **MxAccessGateway** (`mxaccessgw`) | `~/Desktop/MxAccessGateway` | .NET 10 gateway (x64) + .NET 4.8 worker (**x86**), gRPC | `gitea.dohertylan.com/dohertj2/mxaccessgw` | gRPC gateway giving modern clients full MXAccess parity without loading 32-bit COM. Two-process: gateway (ASP.NET Core gRPC + Blazor dashboard) + per-session x86 worker that owns the MXAccess COM STA. **OtOpcUa depends on this.** |
| **ScadaBridge** | `~/Desktop/ScadaBridge` | .NET 10, Akka.NET, Docker | _git_ | Full implementation of the distributed SCADA platform — hub-and-spoke (1 central cluster + N site clusters). Projects prefixed `ZB.MOM.WW.ScadaBridge.*`; solution `ZB.MOM.WW.ScadaBridge.slnx`. Ships `src/`, `tests/`, `docker/` topology, and the design docs that are the spec. | | **ScadaBridge** | `~/Desktop/ScadaBridge` | .NET 10, Akka.NET, Docker | _git_ | Full implementation of the distributed SCADA platform — hub-and-spoke (1 central cluster + N site clusters). Projects prefixed `ZB.MOM.WW.ScadaBridge.*`; solution `ZB.MOM.WW.ScadaBridge.slnx`. Ships `src/`, `tests/`, `docker/` topology, and the design docs that are the spec. |
| **HistorianGateway** | `~/Desktop/HistorianGateway` | .NET 10 x64, gRPC, Blazor | `gitea.dohertylan.com/dohertj2/historiangw` | Single-process gRPC sidecar exposing (1) full read/write API to the AVEVA Historian (5 gRPC services; 15 retrieval modes; historical/backfill writes; tag-config lifecycle; SQL live-value path; store-forward + redundancy resilience; all default-disabled) and (2) read-only Galaxy object-hierarchy browse via the shared `ZB.MOM.WW.GalaxyRepository` lib (consumed as a Gitea-feed package). No COM, no x86 worker. **Dev:** two plaintext endpoints from `appsettings.Development.json` — dashboard on `:5220` (HTTP/1.1), gRPC h2c on `:5221`. **Production:** single `Kestrel:Endpoints:Https` endpoint with `Protocols: Http1AndHttp2` multiplexes dashboard + gRPC over one TLS port (ALPN); warn-only if no TLS endpoint configured (valid behind a reverse proxy / Kubernetes ingress; the warn predicate covers any non-Development environment, i.e. Production + Staging). In a non-Development environment the gateway also logs warn-only **production-readiness** checks (pending.md D2/D3) — relative runtime-artifact paths + secret hygiene (`ApiKeys:Mode=Disabled`, empty/dev-placeholder pepper, dev-placeholder LDAP password). Vendors `AVEVA.Historian.Client` from `histsdk`. Store-forward uses a crash-safe FasterLog append-only outbox (`Microsoft.FASTER.Core` 2.6.5; `CommitMode` PerEntry/Periodic), not SQLite. 744 tests total — 723 green on macOS; the env-gated live historian + Galaxy integration suite (21 tests) skips without a live server. |
## Cross-project relationships ## Cross-project relationships
@@ -84,8 +86,10 @@ the gateway uses `MxGateway.*`). The common subject is **AVEVA System Platform (
`GalaxyRepositoryClient` for the static hierarchy, and an MXAccess session `GalaxyRepositoryClient` for the static hierarchy, and an MXAccess session
(`MxCommand`/`MxEvent` protos) for live read/write/subscribe. A `DeployWatcher` polls the (`MxCommand`/`MxEvent` protos) for live read/write/subscribe. A `DeployWatcher` polls the
gateway's deploy-event signal to rebuild the OPC UA address space on Galaxy redeploy. gateway's deploy-event signal to rebuild the OPC UA address space on Galaxy redeploy.
OtOpcUa's job is purely a **protocol bridge**: it republishes Galaxy as an OPC UA address OtOpcUa's job is a **protocol bridge**: it republishes Galaxy — now bound as a *standard
space for *any* OPC UA client. Equipment-kind driver* alongside its native-protocol drivers, not a special SystemPlatform
mirror — as an OPC UA address space (live values, Part 9 alarms, HistoryRead) for *any* OPC
UA client.
- **ScadaBridge → OPC UA** (OPC UA client). ScadaBridge's DCL has an OPC UA adapter that - **ScadaBridge → OPC UA** (OPC UA client). ScadaBridge's DCL has an OPC UA adapter that
collects data and mirrors native OPC UA Alarms & Conditions. OtOpcUa is exactly such a collects data and mirrors native OPC UA Alarms & Conditions. OtOpcUa is exactly such a
server, so ScadaBridge can ingest Wonderware data **indirectly via OtOpcUa**. server, so ScadaBridge can ingest Wonderware data **indirectly via OtOpcUa**.
@@ -101,15 +105,21 @@ the gateway uses `MxGateway.*`). The common subject is **AVEVA System Platform (
- ScadaBridge has **two paths** to the same Wonderware data: (1) OPC UA → OtOpcUa → - ScadaBridge has **two paths** to the same Wonderware data: (1) OPC UA → OtOpcUa →
gateway, or (2) MxGateway adapter → gateway directly. Path 1 gives standards-based OPC UA gateway, or (2) MxGateway adapter → gateway directly. Path 1 gives standards-based OPC UA
decoupling; path 2 gives a more direct/native feed. decoupling; path 2 gives a more direct/native feed.
- **HistorianGateway is a new, independent sidecar** (no runtime coupling to the three above).
It reaches the Historian via its vendored gRPC client and the Galaxy Repository SQL DB directly,
not through `mxaccessgw`. It consumes the shared `ZB.MOM.WW.GalaxyRepository` lib
(cross-repo `ProjectReference`). Any client that needs Historian data or Galaxy browse can
target HistorianGateway independently; it is not a dependency of OtOpcUa or ScadaBridge today.
- Coupling is loose: each repo references the others only as **sibling context** (the - Coupling is loose: each repo references the others only as **sibling context** (the
`## Sister Projects` note in ScadaBridge's own `CLAUDE.md` lists `MxAccessGateway` and `## Sister Projects` note in ScadaBridge's own `CLAUDE.md` lists `MxAccessGateway` and
`OtOpcUa` with their Gitea URLs but states they are *not part of its solution*). `OtOpcUa` with their Gitea URLs but states they are *not part of its solution*).
- **The break surface is the wire contracts, not code.** Because coupling is by network - **The break surface is the wire contracts, not code.** Because coupling is by network
protocol, the things that break across repo boundaries are: the gateway's `.proto` files protocol, the things that break across repo boundaries are: the gateway's `.proto` files
(`mxaccess_gateway.proto`, `mxaccess_worker.proto`, `galaxy_repository.proto`), and the (`mxaccess_gateway.proto`, `mxaccess_worker.proto`, `galaxy_repository.proto`), the
OPC UA address-space shape OtOpcUa publishes (browse paths, node IDs, A&C alarm model). `historian_gateway.v1` proto (HistorianGateway's own contract), and the OPC UA address-space
Changes to any of these must be coordinated across the affected repos — a green build in shape OtOpcUa publishes (browse paths, node IDs, A&C alarm model). Changes to any of these
one repo does not prove the others still interoperate. must be coordinated across the affected repos — a green build in one repo does not prove the
others still interoperate.
## Component normalization ## Component normalization
@@ -120,12 +130,13 @@ each project's **code-verified current state**, and the **gaps** between. See
| Component | Status | Goal | Design | Implementation | | Component | Status | Goal | Design | Implementation |
|---|---|---|---|---| |---|---|---|---|---|
| Auth (login / identity / authz) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Auth` lib | [`components/auth/`](components/auth/) | [`ZB.MOM.WW.Auth/`](ZB.MOM.WW.Auth/) | | Auth (login / identity / authz) | Adopted (lib `0.1.3`; all 3 apps, merged to **local default** main/master + **pushed to origin** (gitea)) | Shared `ZB.MOM.WW.Auth` lib | [`components/auth/`](components/auth/) | [`ZB.MOM.WW.Auth/`](ZB.MOM.WW.Auth/) |
| UI Theme (layout / tokens / components) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Theme` RCL | [`components/ui-theme/`](components/ui-theme/) | [`ZB.MOM.WW.Theme/`](ZB.MOM.WW.Theme/) | | UI Theme (layout / tokens / components) | Adopted (lib `0.2.0`; all 3 apps, merged to **local default** + **pushed to origin** (gitea)) | Shared `ZB.MOM.WW.Theme` RCL | [`components/ui-theme/`](components/ui-theme/) | [`ZB.MOM.WW.Theme/`](ZB.MOM.WW.Theme/) |
| Health (readiness / liveness / active-node) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Health` lib | [`components/health/`](components/health/) | [`ZB.MOM.WW.Health/`](ZB.MOM.WW.Health/) | | Health (readiness / liveness / active-node) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Health` lib | [`components/health/`](components/health/) | [`ZB.MOM.WW.Health/`](ZB.MOM.WW.Health/) |
| Observability (metrics / traces / logs) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Telemetry` lib + `.Serilog` | [`components/observability/`](components/observability/) | [`ZB.MOM.WW.Telemetry/`](ZB.MOM.WW.Telemetry/) | | Observability (metrics / traces / logs) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Telemetry` lib + `.Serilog` | [`components/observability/`](components/observability/) | [`ZB.MOM.WW.Telemetry/`](ZB.MOM.WW.Telemetry/) |
| Config + validation (options / startup validation) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Configuration` lib | [`components/configuration/`](components/configuration/) | [`ZB.MOM.WW.Configuration/`](ZB.MOM.WW.Configuration/) | | Config + validation (options / startup validation) | Adopted (lib `0.1.0`; all 3 apps, local) | Shared `ZB.MOM.WW.Configuration` lib | [`components/configuration/`](components/configuration/) | [`ZB.MOM.WW.Configuration/`](ZB.MOM.WW.Configuration/) |
| Audit (event model + writer seam) | Built (lib `0.1.0`) | Shared `ZB.MOM.WW.Audit` lib | [`components/audit/`](components/audit/) | [`ZB.MOM.WW.Audit/`](ZB.MOM.WW.Audit/) | | Audit (event model + writer seam) | Adopted (lib `0.1.0`; all 3 apps, merged to **local default** main/master + **pushed to origin** (gitea)) | Shared `ZB.MOM.WW.Audit` lib | [`components/audit/`](components/audit/) | [`ZB.MOM.WW.Audit/`](ZB.MOM.WW.Audit/) |
| Galaxy Repository (object-hierarchy SQL browse + gRPC service) | Built (lib `0.1.0`, **published to the Gitea feed**; consumed by HistorianGateway as a feed `PackageReference`) | Shared `ZB.MOM.WW.GalaxyRepository` lib | _(design in histsdk + design doc 2026-06-23)_ | [`ZB.MOM.WW.GalaxyRepository/`](ZB.MOM.WW.GalaxyRepository/) |
The auth component is fully populated: a normalized [`spec`](components/auth/spec/SPEC.md), a The auth component is fully populated: a normalized [`spec`](components/auth/spec/SPEC.md), a
proposed [`shared-contract`](components/auth/shared-contract/ZB.MOM.WW.Auth.md), three proposed [`shared-contract`](components/auth/shared-contract/ZB.MOM.WW.Auth.md), three
@@ -137,7 +148,14 @@ The shared library is **built and lives in this repo** at [`ZB.MOM.WW.Auth/`](ZB
(its own nested git repo; .NET 10; 4 packages — `Abstractions`, `Ldap`, `ApiKeys`, `AspNetCore`; (its own nested git repo; .NET 10; 4 packages — `Abstractions`, `Ldap`, `ApiKeys`, `AspNetCore`;
172 tests; `dotnet pack` → 4 nupkgs @ 0.1.0). The implementation plan is at 172 tests; `dotnet pack` → 4 nupkgs @ 0.1.0). The implementation plan is at
[`docs/plans/2026-06-01-zb-mom-ww-auth-shared-library.md`](docs/plans/2026-06-01-zb-mom-ww-auth-shared-library.md). [`docs/plans/2026-06-01-zb-mom-ww-auth-shared-library.md`](docs/plans/2026-06-01-zb-mom-ww-auth-shared-library.md).
**Not yet adopted** by the three apps — that's the follow-on tracked in [`components/auth/GAPS.md`](components/auth/GAPS.md) (#8). **Adopted across all three apps on 2026-06-02** (auth GAPS #1#8) on each repo's `feat/adopt-zb-auth` branch —
committed + reviewed, then **fast-forward-merged into the repo's local default (main/master) and PUSHED to origin
(gitea) on 2026-06-03** (in sync; the `feat/*` branches kept locally as history). Cutover: shared `Auth.Ldap`,
`Auth.ApiKeys` (ScadaBridge inbound fully re-architected to the keyId/Bearer model), `IGroupRoleMapper<TRole>` seam,
`Transport`-enum config, canonical `ZbClaimTypes`/`ZbCookieDefaults`, unified dev base DN `dc=zb,dc=local`, and the
canonical-six role vocabulary (with ScadaBridge's accepted auditor/admin SoD collapse). Consumer pins: OtOpcUa `0.1.1`,
MxGateway `0.1.2`, ScadaBridge `0.1.3`. Per-repo detail in [`components/auth/GAPS.md`](components/auth/GAPS.md) +
`docs/plans/2026-06-02-auth-audit-normalization*.md`.
Build/test from `ZB.MOM.WW.Auth/`: `dotnet test`. Consumer matrix: OtOpcUa → Abstractions+Ldap+AspNetCore; Build/test from `ZB.MOM.WW.Auth/`: `dotnet test`. Consumer matrix: OtOpcUa → Abstractions+Ldap+AspNetCore;
MxAccessGateway & ScadaBridge → all four (ApiKeys not used by OtOpcUa). MxAccessGateway & ScadaBridge → all four (ApiKeys not used by OtOpcUa).
@@ -149,10 +167,18 @@ backlog. Shared = Technical-Light tokens + IBM Plex fonts + side-rail shell + wi
per-project = each app's `site.css` page layout, route content, scoped `.razor.css`. per-project = each app's `site.css` page layout, route content, scoped `.razor.css`.
The shared RCL is **built and lives in this repo** at [`ZB.MOM.WW.Theme/`](ZB.MOM.WW.Theme/) The shared RCL is **built and lives in this repo** at [`ZB.MOM.WW.Theme/`](ZB.MOM.WW.Theme/)
(.NET 10 Razor Class Library; single package; 32 bUnit tests; `dotnet pack` → 1 nupkg @ 0.1.0). (.NET 10 Razor Class Library; single package; 44 bUnit tests; `dotnet pack` → 1 nupkg @ 0.2.0,
The implementation plan is at **published to the Gitea feed**). The build plan is at
[`docs/plans/2026-06-01-zb-mom-ww-theme-shared-library.md`](docs/plans/2026-06-01-zb-mom-ww-theme-shared-library.md). [`docs/plans/2026-06-01-zb-mom-ww-theme-shared-library.md`](docs/plans/2026-06-01-zb-mom-ww-theme-shared-library.md);
**Not yet adopted** by the three apps — that's the follow-on tracked in [`components/ui-theme/GAPS.md`](components/ui-theme/GAPS.md). the adoption plan at [`docs/plans/2026-06-03-ui-theme-adoption.md`](docs/plans/2026-06-03-ui-theme-adoption.md).
**Adopted across all three apps on 2026-06-03** (full canonical cutover, SPEC §7) on each repo's
`feat/adopt-zb-theme` branch — committed + spec/code-reviewed, then **fast-forward-merged into each repo's local
default (master/main) and PUSHED to origin (gitea)** (in sync; `feat/*` kept locally as history): OtOpcUa
`lmxopcua` `master`@`11de14d`, ScadaBridge `main`@`58352a6`, MxGateway→`mxaccessgw` `main`@`73e54e2`. The `0.1.0 → 0.2.0` bump first promoted nav-expand persistence
into the kit (`NavRailSection.Key`/`data-nav-key` + a localStorage `nav-state.js` enhancer emitted by a new
`<ThemeScripts/>`), so all three apps share one persistence mechanism (OtOpcUa's bespoke cookie/JS-interop nav
island retired); MxGateway additionally gained a net-new Blazor `<LoginCard>` `/login` page over its existing
hardened endpoint. Per-app result in [`components/ui-theme/GAPS.md`](components/ui-theme/GAPS.md).
Build/test from `ZB.MOM.WW.Theme/`: `dotnet test`. Consumer matrix: all three apps consume Build/test from `ZB.MOM.WW.Theme/`: `dotnet test`. Consumer matrix: all three apps consume
the single `ZB.MOM.WW.Theme` package (OtOpcUa AdminUI, MxGateway Server, ScadaBridge Host + CentralUI). the single `ZB.MOM.WW.Theme` package (OtOpcUa AdminUI, MxGateway Server, ScadaBridge Host + CentralUI).
@@ -183,9 +209,14 @@ enrichers, and redaction policies.
The shared library is **built and lives in this repo** at [`ZB.MOM.WW.Telemetry/`](ZB.MOM.WW.Telemetry/) The shared library is **built and lives in this repo** at [`ZB.MOM.WW.Telemetry/`](ZB.MOM.WW.Telemetry/)
(.NET 10; 2 packages — `ZB.MOM.WW.Telemetry`, `ZB.MOM.WW.Telemetry.Serilog`; 19 tests; (.NET 10; 2 packages — `ZB.MOM.WW.Telemetry`, `ZB.MOM.WW.Telemetry.Serilog`; 19 tests;
`dotnet pack` → 2 nupkgs @ 0.1.0). **MxAccessGateway logging adopted** (MEL → Serilog migration done on `dotnet pack` → 2 nupkgs @ 0.1.0). **Adopted across all three apps on 2026-06-01** (branch
its own branch) — the one in-pass adoption. Broader OtOpcUa and ScadaBridge telemetry adoption is `feat/adopt-zb-telemetry` per repo, behaviour-preserving): `AddZbTelemetry` (Resource + standard
follow-on, tracked in [`components/observability/GAPS.md`](components/observability/GAPS.md). instrumentation + Prometheus `/metrics`) everywhere; OtOpcUa + MxGateway on `AddZbSerilog` (MxGateway's
MEL→Serilog migration + metrics export both landed in this pass — they were *not* actually done
beforehand despite an earlier claim); ScadaBridge keeps its `LoggerConfigurationFactory` (min-level
governance) and only adds the shared `TraceContextEnricher`. Deferred: MxGateway `ms``s` + Meter
rename, ScadaBridge app instruments + Site-node HTTP/1.1 metrics listener, OTLP wiring. Per-repo
result tracked in [`components/observability/GAPS.md`](components/observability/GAPS.md).
Build/test from `ZB.MOM.WW.Telemetry/`: `dotnet test`. Consumer matrix: all three apps consume both Build/test from `ZB.MOM.WW.Telemetry/`: `dotnet test`. Consumer matrix: all three apps consume both
packages after adoption (OtOpcUa, MxGateway Server, ScadaBridge Host + any instrumented project). packages after adoption (OtOpcUa, MxGateway Server, ScadaBridge Host + any instrumented project).
@@ -203,7 +234,12 @@ The shared library is **built and lives in this repo** at [`ZB.MOM.WW.Configurat
(.NET 10; single package `ZB.MOM.WW.Configuration`; 27 tests; `dotnet pack` → 1 nupkg @ 0.1.0). (.NET 10; single package `ZB.MOM.WW.Configuration`; 27 tests; `dotnet pack` → 1 nupkg @ 0.1.0).
The implementation plan is at The implementation plan is at
[`docs/plans/2026-06-01-zb-mom-ww-configuration-shared-library.md`](docs/plans/2026-06-01-zb-mom-ww-configuration-shared-library.md). [`docs/plans/2026-06-01-zb-mom-ww-configuration-shared-library.md`](docs/plans/2026-06-01-zb-mom-ww-configuration-shared-library.md).
**Not yet adopted** by the three apps — that's the follow-on tracked in [`components/configuration/GAPS.md`](components/configuration/GAPS.md). **Adopted across all three apps on 2026-06-01** (OtOpcUa, MxAccessGateway, ScadaBridge) on each repo's
local default branch (`main`/`master`) — merged, **not yet pushed** to remotes; the package was first
published to the Gitea feed. Behaviour-preserving onto `OptionsValidatorBase`/`AddValidatedOptions`
for MxGateway + ScadaBridge (validator messages byte-identical), `StartupValidator``ConfigPreflight`
for ScadaBridge, and net-new `Ldap`/`OpcUa` validators for OtOpcUa. Per-app result tracked in
[`components/configuration/GAPS.md`](components/configuration/GAPS.md).
Build/test from `ZB.MOM.WW.Configuration/`: `dotnet test`. Consumer matrix: all three apps consume the Build/test from `ZB.MOM.WW.Configuration/`: `dotnet test`. Consumer matrix: all three apps consume the
single package; ScadaBridge is the heaviest adopter (per-module validators + `StartupValidator` single package; ScadaBridge is the heaviest adopter (per-module validators + `StartupValidator`
`ConfigPreflight`); OtOpcUa adoption is additive (it has no `IValidateOptions` usage today). `ConfigPreflight`); OtOpcUa adoption is additive (it has no `IValidateOptions` usage today).
@@ -221,10 +257,39 @@ principal. `IAuditRedactor` is aligned with Telemetry's `ILogRedactor` seam conv
The shared library is **built and lives in this repo** at [`ZB.MOM.WW.Audit/`](ZB.MOM.WW.Audit/) The shared library is **built and lives in this repo** at [`ZB.MOM.WW.Audit/`](ZB.MOM.WW.Audit/)
(.NET 10; 1 package — `ZB.MOM.WW.Audit`; only non-BCL dependency `Microsoft.Extensions.DependencyInjection.Abstractions`; (.NET 10; 1 package — `ZB.MOM.WW.Audit`; only non-BCL dependency `Microsoft.Extensions.DependencyInjection.Abstractions`;
19 tests; `dotnet pack` → 1 nupkg @ 0.1.0). Repo: `https://gitea.dohertylan.com/dohertj2/zb-mom-ww-audit`. 19 tests; `dotnet pack` → 1 nupkg @ 0.1.0). Repo: `https://gitea.dohertylan.com/dohertj2/zb-mom-ww-audit`.
**Not yet adopted** by the three apps — that's the follow-on tracked in [`components/audit/GAPS.md`](components/audit/GAPS.md). **Adopted across all three apps on 2026-06-02** (audit GAPS #1#6) on each repo's `feat/adopt-zb-audit` branch
(stacked on `feat/adopt-zb-auth`) — committed + reviewed, then **merged into the repo's local default (main/master)
and PUSHED to origin (gitea) on 2026-06-03** (in sync). Depth =
**DEEP adopt** (the canonical 9-field `AuditEvent` is the record everywhere; domain fields ride in `DetailsJson`).
OtOpcUa: canonical record + `AuditWriterActor : IAuditWriter` + `Outcome` column/migration + `ClusterAudit` fix.
MxGateway: new canonical SQLite `audit_event` store + `IAuditWriter` + `IApiKeyAuditStore`→canonical adapter.
**ScadaBridge: a full audit-subsystem re-architecture** (the program's largest task) — canonical record everywhere via a
deterministic codec; site SQLite split into `audit_event` + an `audit_forward_state` forwarding sidecar; central
partitioned `dbo.AuditLog` collapsed to 10 canonical cols + persisted computed cols (`CollapseAuditLogToCanonical`
migration, MSSQL-verified). Phase 3 wires `Actor` from the Auth principal at authenticated emit sites (per-app
`IAuditActorAccessor`). Per-repo detail in [`components/audit/GAPS.md`](components/audit/GAPS.md) +
`docs/plans/2026-06-02-auth-audit-normalization-phase2-deep.md` + `…-scadabridge-audit-rearch.md`.
Build/test from `ZB.MOM.WW.Audit/`: `dotnet test`. Consumer matrix: all three apps consume the single Build/test from `ZB.MOM.WW.Audit/`: `dotnet test`. Consumer matrix: all three apps consume the single
`ZB.MOM.WW.Audit` package (OtOpcUa, MxAccessGateway, ScadaBridge each map their own audit record/seam `ZB.MOM.WW.Audit` package (OtOpcUa, MxAccessGateway, ScadaBridge — DEEP-adopted as the canonical record).
onto the canonical type at the emit boundary).
The Galaxy Repository component normalizes the **Galaxy object-hierarchy SQL browse + reusable gRPC service**
that was previously embedded in `mxaccessgw`. Shared = canonical `galaxy_repository.v1` proto (wire-compatible
with `mxaccessgw`'s existing contract so OtOpcUa's `GalaxyRepositoryClient` is unaffected), the SQL browse
provider (`HierarchySql` / `AttributesSql` validated reverse-engineered queries), in-memory hierarchy cache +
snapshot + deploy-poll refresh `BackgroundService`, `GalaxyHierarchyProjector`, and `AddZbGalaxyRepository` /
`MapZbGalaxyRepository` DI extension. Left per-consumer = section path, subtree auth filtering, and any
app-specific paging defaults.
The shared library is **built and lives in this repo** at [`ZB.MOM.WW.GalaxyRepository/`](ZB.MOM.WW.GalaxyRepository/)
(.NET 10; single package `ZB.MOM.WW.GalaxyRepository`; `dotnet pack` → 1 nupkg @ 0.1.0, **published to
the Gitea NuGet feed** `gitea.dohertylan.com/api/packages/dohertj2/nuget`). The design doc is at
[`docs/plans/2026-06-23-historian-gateway-design.md`](docs/plans/2026-06-23-historian-gateway-design.md) (§10, component 1).
**Consumed by HistorianGateway** as a `PackageReference` from that Gitea feed, pinned at `0.1.0` (originally a
cross-repo `ProjectReference` to this scadaproj tree; switched to the feed package 2026-06-24).
**mxaccessgw adoption is a tracked follow-on** — once adopted, mxaccessgw's inline Galaxy browse code is replaced
by the shared lib (the `galaxy_repository.v1` wire contract is already identical, so OtOpcUa and ScadaBridge
clients are unaffected). Build/test from `ZB.MOM.WW.GalaxyRepository/`: `dotnet test`.
Consumer matrix: HistorianGateway (initial); mxaccessgw (follow-on adoption).
## Per-project primary commands ## Per-project primary commands
@@ -246,9 +311,23 @@ dotnet run --project src/MxGateway.Server/MxGateway.Server.csproj
# ScadaBridge (~/Desktop/ScadaBridge) # ScadaBridge (~/Desktop/ScadaBridge)
dotnet build ZB.MOM.WW.ScadaBridge.slnx dotnet build ZB.MOM.WW.ScadaBridge.slnx
bash docker/deploy.sh # rebuild + redeploy the 8-node cluster bash docker/deploy.sh # rebuild + redeploy the 8-node cluster
cd infra && docker compose up -d # local test services (LDAP, SQL, OPC UA, SMTP, REST, Traefik) cd infra && docker compose up -d # local test services (SQL, OPC UA, SMTP, REST, Traefik) — LDAP is NOT here
# HistorianGateway (~/Desktop/HistorianGateway)
dotnet build ZB.MOM.WW.HistorianGateway.slnx
dotnet test ZB.MOM.WW.HistorianGateway.slnx # unit + golden; live integration tests skip without env vars
dotnet run --project src/ZB.MOM.WW.HistorianGateway.Server/ZB.MOM.WW.HistorianGateway.Server.csproj
# Dev: dashboard on :5220 (HTTP/1.1), gRPC h2c on :5221 (from appsettings.Development.json)
# Production: single Kestrel:Endpoints:Https with Protocols=Http1AndHttp2 (ALPN, one TLS port)
# Live integration (need HISTORIAN_GRPC_HOST + HISTORIAN_GRPC_WRITE_SANDBOX_TAG + GALAXY_SQL_CONNSTR set)
dotnet test ZB.MOM.WW.HistorianGateway.slnx --filter "Category=LiveIntegration"
``` ```
> **Shared GLAuth (all three apps + HistorianGateway):** LDAP auth for every local dev/test stack is provided by a
> single `zb-shared-glauth` container on the Linux fixture host **`10.100.0.35:3893`**
> (`baseDN dc=zb,dc=local`, Transport=None). Source of truth and deploy runbook:
> [`scadaproj/infra/glauth/`](infra/glauth/) (`config.toml` + `docker-compose.yml` + `README.md`).
## Refreshing this index ## Refreshing this index
This file is meant to be re-scanned when `scadaproj` is opened in Claude Code: This file is meant to be re-scanned when `scadaproj` is opened in Claude Code:
+24
View File
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
# push.sh — pack and push the ZB.MOM.WW.Audit NuGet package to the Gitea feed.
#
# Required environment variables:
# GITEA_NUGET_SOURCE — full URL of the Gitea NuGet feed
# e.g. https://gitea.dohertylan.com/api/packages/dohertj2/nuget/index.json
# GITEA_NUGET_KEY — Gitea access token with package:write permission
#
# Usage:
# export GITEA_NUGET_SOURCE="https://gitea.dohertylan.com/api/packages/dohertj2/nuget/index.json"
# export GITEA_NUGET_KEY="your-gitea-token"
# ./build/push.sh
set -euo pipefail
: "${GITEA_NUGET_SOURCE:?set GITEA_NUGET_SOURCE to your Gitea NuGet feed URL}"
: "${GITEA_NUGET_KEY:?set GITEA_NUGET_KEY to your Gitea access token}"
dotnet pack -c Release -o ./artifacts
dotnet nuget push "./artifacts/*.nupkg" \
--source "$GITEA_NUGET_SOURCE" \
--api-key "$GITEA_NUGET_KEY" \
--skip-duplicate
@@ -1,8 +1,9 @@
namespace ZB.MOM.WW.Audit; namespace ZB.MOM.WW.Audit;
/// <summary>Fans an event out to several writers. Best-effort: a failing writer does not stop the others.</summary> /// <summary>Fans an event out to several writers. Best-effort: a failing writer does not stop the others.</summary>
/// <remarks>A failing writer's exception is swallowed so the fan-out drains and the caller is never /// <remarks>Every inner-writer failure is swallowed — including <see cref="OperationCanceledException"/>
/// aborted — but <see cref="OperationCanceledException"/> is re-thrown so cancellation is honored.</remarks> /// — so the fan-out drains and the caller is never aborted, honoring the <see cref="IAuditWriter"/>
/// "must not throw to the caller" contract even when a request-scoped cancellation token is passed.</remarks>
public sealed class CompositeAuditWriter : IAuditWriter public sealed class CompositeAuditWriter : IAuditWriter
{ {
private readonly IReadOnlyList<IAuditWriter> _inner; private readonly IReadOnlyList<IAuditWriter> _inner;
@@ -21,8 +22,7 @@ public sealed class CompositeAuditWriter : IAuditWriter
foreach (var writer in _inner) foreach (var writer in _inner)
{ {
try { await writer.WriteAsync(evt, ct).ConfigureAwait(false); } try { await writer.WriteAsync(evt, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { throw; } // honor cancellation; do not swallow catch { /* best-effort seam: a failing writer (incl. cancellation) must not stop the others or the caller */ }
catch { /* best-effort seam: a failing writer must not stop the others or the caller */ }
} }
} }
} }
@@ -2,8 +2,9 @@ namespace ZB.MOM.WW.Audit;
/// <summary> /// <summary>
/// Redactor that caps oversized <see cref="AuditEvent.DetailsJson"/> and <see cref="AuditEvent.Target"/>. /// Redactor that caps oversized <see cref="AuditEvent.DetailsJson"/> and <see cref="AuditEvent.Target"/>.
/// Never throws — over-redacts (drops DetailsJson) on internal failure. The secret-field policy /// Never throws — over-redacts (drops both DetailsJson and Target) on internal failure. The
/// (which fields are sensitive) stays per-project; compose this with a project redactor as needed. /// secret-field policy (which fields are sensitive) stays per-project; compose this with a project
/// redactor as needed.
/// </summary> /// </summary>
public sealed class TruncatingAuditRedactor : IAuditRedactor public sealed class TruncatingAuditRedactor : IAuditRedactor
{ {
@@ -26,13 +27,15 @@ public sealed class TruncatingAuditRedactor : IAuditRedactor
} }
catch catch
{ {
// Hard contract: never throw. Over-redact on internal failure. // Hard contract: never throw, and over-redact to a STRICTLY safer event on internal
return rawEvent with { DetailsJson = null }; // failure — scrub every field this redactor owns (both DetailsJson and Target).
return rawEvent with { DetailsJson = null, Target = null };
} }
} }
private string? Truncate(string? value, int max) private string? Truncate(string? value, int max)
{ {
if (max < 0) max = 0; // clamp nonsensical negative caps so a config bug fails safe, not throws
if (value is null || value.Length <= max) return value; if (value is null || value.Length <= max) return value;
var marker = _options.TruncationMarker; var marker = _options.TruncationMarker;
if (marker.Length >= max) return marker[..max]; if (marker.Length >= max) return marker[..max];
@@ -1,12 +1,12 @@
namespace ZB.MOM.WW.Audit; namespace ZB.MOM.WW.Audit;
/// <summary>Caps for <see cref="TruncatingAuditRedactor"/>.</summary> /// <summary>Immutable caps for <see cref="TruncatingAuditRedactor"/>.</summary>
public sealed class TruncatingAuditRedactorOptions public sealed record TruncatingAuditRedactorOptions
{ {
/// <summary>Max length of <see cref="AuditEvent.DetailsJson"/> before truncation. Default 4096.</summary> /// <summary>Max length of <see cref="AuditEvent.DetailsJson"/> before truncation. Default 4096.</summary>
public int MaxDetailsJsonLength { get; set; } = 4096; public int MaxDetailsJsonLength { get; init; } = 4096;
/// <summary>Max length of <see cref="AuditEvent.Target"/> before truncation. Default 512.</summary> /// <summary>Max length of <see cref="AuditEvent.Target"/> before truncation. Default 512.</summary>
public int MaxTargetLength { get; set; } = 512; public int MaxTargetLength { get; init; } = 512;
/// <summary>Marker appended to a truncated value. Default "…[truncated]".</summary> /// <summary>Marker appended to a truncated value. Default "…[truncated]".</summary>
public string TruncationMarker { get; set; } = "…[truncated]"; public string TruncationMarker { get; init; } = "…[truncated]";
} }
@@ -3,6 +3,8 @@
<TargetFramework>net10.0</TargetFramework> <TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<!-- Emit and pack XML docs so consumers get IntelliSense/tooltip documentation. -->
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup> </PropertyGroup>
<PropertyGroup> <PropertyGroup>
<IsPackable>true</IsPackable> <IsPackable>true</IsPackable>
@@ -38,11 +38,32 @@ public class CompositeAuditWriterTests
} }
[Fact] [Fact]
public async Task Cancellation_is_propagated_not_swallowed() public async Task Cancellation_does_not_surface_to_the_caller()
{ {
// OperationCanceledException is re-thrown (unlike ordinary writer failures, which are swallowed). // Per the IAuditWriter hard contract ("must not throw to the caller"), an
// OperationCanceledException from an inner writer is swallowed like any other failure —
// it must NOT abort the user-facing action that produced the event.
var after = new RecordingWriter(); var after = new RecordingWriter();
var sut = new CompositeAuditWriter(new IAuditWriter[] { new CancellingWriter(), after }); var sut = new CompositeAuditWriter(new IAuditWriter[] { new CancellingWriter(), after });
await Assert.ThrowsAsync<OperationCanceledException>(() => sut.WriteAsync(Evt())); await sut.WriteAsync(Evt()); // must not throw
Assert.Equal(1, after.Count); // drain continues past the cancelled writer
}
[Fact]
public async Task Empty_writer_list_is_a_no_op()
{
var sut = new CompositeAuditWriter(Array.Empty<IAuditWriter>());
await sut.WriteAsync(Evt()); // must not throw
}
[Fact]
public async Task Null_writer_entry_is_swallowed_and_does_not_stop_the_others()
{
// A null inner writer faults the await; the best-effort seam swallows it (like any
// other writer failure) and continues draining the remaining writers.
var after = new RecordingWriter();
var sut = new CompositeAuditWriter(new IAuditWriter?[] { null, after }!);
await sut.WriteAsync(Evt()); // must not throw
Assert.Equal(1, after.Count);
} }
} }
@@ -53,4 +53,30 @@ public class TruncatingAuditRedactorTests
var result = r.Apply(Evt(new string('x', 20))); var result = r.Apply(Evt(new string('x', 20)));
Assert.Equal(3, result.DetailsJson!.Length); Assert.Equal(3, result.DetailsJson!.Length);
} }
[Fact]
public void Negative_max_is_treated_as_zero_and_does_not_throw()
{
// A negative cap is nonsensical misconfiguration. Truncate must clamp to 0 rather than
// throw, capping the value to the empty string (plus marker handling).
var opts = new TruncatingAuditRedactorOptions { MaxDetailsJsonLength = -5, MaxTargetLength = -1, TruncationMarker = "" };
var r = new TruncatingAuditRedactor(opts);
var result = r.Apply(Evt(new string('x', 20), target: new string('y', 20)));
Assert.Equal(string.Empty, result.DetailsJson);
Assert.Equal(string.Empty, result.Target);
}
[Fact]
public void Over_redact_fallback_scrubs_both_details_and_target_without_throwing()
{
// Drive the REAL TruncatingAuditRedactor.Apply into its catch branch via a reachable
// misconfiguration (a null TruncationMarker faults inside Truncate). The over-redact
// fallback must be strictly safer: BOTH DetailsJson AND Target scrubbed to null, no throw.
var opts = new TruncatingAuditRedactorOptions { MaxDetailsJsonLength = 5, TruncationMarker = null! };
var r = new TruncatingAuditRedactor(opts);
var raw = Evt(new string('x', 50), target: "sensitive target");
var result = r.Apply(raw);
Assert.Null(result.DetailsJson);
Assert.Null(result.Target);
}
} }
+1 -1
View File
@@ -5,7 +5,7 @@
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<LangVersion>latest</LangVersion> <LangVersion>latest</LangVersion>
<Version>0.1.0</Version> <Version>0.1.3</Version>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally> <ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
</PropertyGroup> </PropertyGroup>
+2 -2
View File
@@ -10,8 +10,8 @@ Authentication and authorisation libraries for the **ZB.MOM.WW SCADA family** (O
|---|---|---| |---|---|---|
| `ZB.MOM.WW.Auth.Abstractions` | Auth contracts, canonical role constants, and shared types (`LdapOptions`, `LdapAuthResult`, `ILdapAuthService`, `IApiKeyStore`). No runtime dependencies beyond the BCL. | — | | `ZB.MOM.WW.Auth.Abstractions` | Auth contracts, canonical role constants, and shared types (`LdapOptions`, `LdapAuthResult`, `ILdapAuthService`, `IApiKeyStore`). No runtime dependencies beyond the BCL. | — |
| `ZB.MOM.WW.Auth.Ldap` | LDAP authentication service: bind-then-search-then-bind against GLAuth or Active Directory; RFC 4514-aware group extraction; fail-closed. | `Abstractions`, `Novell.Directory.Ldap.NETStandard` | | `ZB.MOM.WW.Auth.Ldap` | LDAP authentication service: bind-then-search-then-bind against GLAuth or Active Directory; RFC 4514-aware group extraction; fail-closed. | `Abstractions`, `Novell.Directory.Ldap.NETStandard` |
| `ZB.MOM.WW.Auth.ApiKeys` | SQLite-backed API-key store with pepper-based PBKDF2 hashing, rotation, and audit log. Includes a `MigrationHostedService` that runs schema migrations on startup. | `Abstractions`, `Microsoft.Data.Sqlite` | | `ZB.MOM.WW.Auth.ApiKeys` | SQLite-backed API-key store with **pepper-keyed HMAC-SHA256** secret hashing, rotation, and audit log. DI wiring is `AddZbApiKeyAuth`; an opt-in `MigrationHostedService` runs schema migrations on startup. | `Abstractions`, `Microsoft.Data.Sqlite` |
| `ZB.MOM.WW.Auth.AspNetCore` | ASP.NET Core DI helpers (`AddZbAuth`), cookie defaults, claim-type constants, and `LdapOptionsValidator` registration. Wires together Ldap + ApiKeys + cookie middleware. | `Abstractions`, `Ldap`, `ApiKeys`, `Microsoft.AspNetCore.App` | | `ZB.MOM.WW.Auth.AspNetCore` | ASP.NET Core wiring for the **LDAP** provider only: `AddZbLdapAuth` (binds + start-time-validates `LdapOptions`, registers `ILdapAuthService`), plus `ZbCookieDefaults.Apply` (hardened cookie helper the consumer calls itself) and `ZbClaimTypes` constants. It does **not** wire API keys or cookie middleware — API-key DI is `AddZbApiKeyAuth` in the `ApiKeys` package. | `Abstractions`, `Ldap`, `Microsoft.AspNetCore.App` |
--- ---
@@ -55,6 +55,12 @@ public interface IApiKeyAdminStore
Task<bool> RotateAsync(string keyId, byte[] newSecretHash, CancellationToken ct); Task<bool> RotateAsync(string keyId, byte[] newSecretHash, CancellationToken ct);
Task<bool> DeleteAsync(string keyId, CancellationToken ct); Task<bool> DeleteAsync(string keyId, CancellationToken ct);
/// <summary>Replaces the scope set on an existing key. Does not touch the secret. Returns false if the key does not exist.</summary>
Task<bool> SetScopesAsync(string keyId, IReadOnlySet<string> scopes, CancellationToken ct);
/// <summary>Enables (clears revoked_utc) or disables (sets revoked_utc) a key WITHOUT changing its secret. Returns false if the key does not exist.</summary>
Task<bool> SetEnabledAsync(string keyId, bool enabled, DateTimeOffset whenUtc, CancellationToken ct);
/// <summary> /// <summary>
/// Enumerates all API keys as hash-free <see cref="ApiKeyListItem"/> projections, newest first. /// Enumerates all API keys as hash-free <see cref="ApiKeyListItem"/> projections, newest first.
/// The secret hash is never selected, so callers cannot use this to recover secret material. /// The secret hash is never selected, so callers cannot use this to recover secret material.
@@ -1,3 +1,5 @@
using System.Net.Security;
namespace ZB.MOM.WW.Auth.Abstractions.Ldap; namespace ZB.MOM.WW.Auth.Abstractions.Ldap;
public enum LdapTransport { Ldaps, StartTls, None } public enum LdapTransport { Ldaps, StartTls, None }
@@ -16,6 +18,16 @@ public sealed record LdapOptions
public string DisplayNameAttribute { get; init; } = "cn"; public string DisplayNameAttribute { get; init; } = "cn";
public string GroupAttribute { get; init; } = "memberOf"; public string GroupAttribute { get; init; } = "memberOf";
public int ConnectionTimeoutMs { get; init; } = 10_000; public int ConnectionTimeoutMs { get; init; } = 10_000;
/// <summary>
/// Optional hook to harden (or, in dev, relax) TLS server-certificate validation for the
/// <see cref="LdapTransport.Ldaps"/> / <see cref="LdapTransport.StartTls"/> transports. When
/// <see langword="null"/> (the default) the LDAP client validates the server certificate against
/// the OS trust store — it does <em>not</em> blind-accept. Supply a callback to pin a CA, validate
/// the SAN against <see cref="Server"/>, or otherwise tighten validation. This is a code-only seam
/// (not bound from configuration) and takes precedence over <see cref="AllowInsecure"/>.
/// </summary>
public RemoteCertificateValidationCallback? ServerCertificateValidationCallback { get; init; }
} }
public enum LdapAuthFailure { BadCredentials, UserNotFound, AmbiguousUser, GroupLookupFailed, ServiceAccountBindFailed, Disabled } public enum LdapAuthFailure { BadCredentials, UserNotFound, AmbiguousUser, GroupLookupFailed, ServiceAccountBindFailed, Disabled }
@@ -101,7 +101,10 @@ public sealed class ApiKeyAdminCommands
var record = new ApiKeyRecord( var record = new ApiKeyRecord(
KeyId: keyId, KeyId: keyId,
KeyPrefix: $"{_options.TokenPrefix}_{keyId}", // KeyPrefix is the bare token prefix (e.g. "mxgw"), NOT prefix_keyId — the key id is
// already its own column. Embedding it here produced a self-referential value that
// confused admin tooling and disagreed with the read/test paths (see Auth-005).
KeyPrefix: _options.TokenPrefix,
SecretHash: secretHash, SecretHash: secretHash,
DisplayName: displayName, DisplayName: displayName,
Scopes: scopes, Scopes: scopes,
@@ -184,6 +187,53 @@ public sealed class ApiKeyAdminCommands
return new KeyActionResult(deleted, status); return new KeyActionResult(deleted, status);
} }
/// <summary>
/// set-scopes: replaces the scope set on an existing key WITHOUT touching its secret, and
/// appends a <c>set-scopes</c> audit entry. Only the scope count is recorded in the audit
/// details — the scope values themselves are not logged verbatim.
/// All attempts are audited, including failures (key not found) — this is intentional to
/// maintain a complete security trail.
/// </summary>
public async Task<KeyActionResult> SetScopesAsync(
string keyId, IReadOnlySet<string> scopes, string? remoteAddress, CancellationToken ct)
{
ArgumentException.ThrowIfNullOrWhiteSpace(keyId);
ArgumentNullException.ThrowIfNull(scopes);
bool updated = await _adminStore.SetScopesAsync(keyId, scopes, ct).ConfigureAwait(false);
string status = updated ? "scopes-set" : "not-found";
// Record only the count, never the scope contents, to avoid leaking authority detail into audit.
await AppendAuditAsync(keyId, "set-scopes", remoteAddress, $"{status}; count={scopes.Count}", ct)
.ConfigureAwait(false);
return new KeyActionResult(updated, status);
}
/// <summary>
/// enable-key / disable-key: reversibly toggles a key's active state WITHOUT changing its
/// secret, and appends an <c>enable-key</c> (when enabling) or <c>disable-key</c> (when
/// disabling) audit entry.
/// All attempts are audited, including failures (key not found) — this is intentional to
/// maintain a complete security trail.
/// </summary>
public async Task<KeyActionResult> SetEnabledAsync(
string keyId, bool enabled, string? remoteAddress, CancellationToken ct)
{
ArgumentException.ThrowIfNullOrWhiteSpace(keyId);
DateTimeOffset now = _clock.GetUtcNow();
bool updated = await _adminStore.SetEnabledAsync(keyId, enabled, now, ct).ConfigureAwait(false);
string eventType = enabled ? "enable-key" : "disable-key";
string status = updated
? (enabled ? "enabled" : "disabled")
: "not-found";
await AppendAuditAsync(keyId, eventType, remoteAddress, status, ct).ConfigureAwait(false);
return new KeyActionResult(updated, status);
}
private string RequirePepper() private string RequirePepper()
{ {
string? pepper = _pepperProvider.GetPepper(); string? pepper = _pepperProvider.GetPepper();
@@ -62,8 +62,24 @@ public sealed class ApiKeyVerifier(
return Fail(ApiKeyFailure.SecretMismatch); return Fail(ApiKeyFailure.SecretMismatch);
} }
// 6. Record successful use, then return the identity (no secret/hash/pepper included). // 6. The authentication decision is already made (line 60). Recording last-used is
await store.MarkUsedAsync(record.KeyId, _timeProvider.GetUtcNow(), ct).ConfigureAwait(false); // best-effort bookkeeping: a transient storage hiccup (SQLITE_BUSY past the busy-timeout,
// disk full, DB locked by a migration) must NOT turn an otherwise-valid credential into a
// failed auth. Swallow any non-cancellation failure so the only exception path remains
// cancellation, as the class contract promises. Cancellation is honoured (re-thrown).
try
{
await store.MarkUsedAsync(record.KeyId, _timeProvider.GetUtcNow(), ct).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
throw;
}
catch
{
// Best-effort: the last-used write failed, but the credential is valid. Fail open on the
// bookkeeping (not the auth decision) rather than denying a legitimate caller.
}
return new ApiKeyVerification( return new ApiKeyVerification(
Succeeded: true, Succeeded: true,
@@ -20,7 +20,12 @@ public static class ScopeSerializer
/// <summary>Deserializes scopes from a JSON array string.</summary> /// <summary>Deserializes scopes from a JSON array string.</summary>
/// <param name="value">The JSON string to deserialize; may be null or empty.</param> /// <param name="value">The JSON string to deserialize; may be null or empty.</param>
/// <returns>An ordinal-compared set of scopes; empty when the input is null/blank.</returns> /// <returns>
/// An ordinal-compared set of scopes; empty when the input is null/blank. A malformed or
/// non-array column (operator tampering, a partial write, a format change, or a buggy writer)
/// fails closed to an EMPTY set rather than throwing, so a single poisoned row degrades to a
/// zero-scope identity on the auth path instead of an unhandled <see cref="JsonException"/>.
/// </returns>
public static IReadOnlySet<string> Deserialize(string? value) public static IReadOnlySet<string> Deserialize(string? value)
{ {
if (string.IsNullOrWhiteSpace(value)) if (string.IsNullOrWhiteSpace(value))
@@ -28,7 +33,18 @@ public static class ScopeSerializer
return new HashSet<string>(StringComparer.Ordinal); return new HashSet<string>(StringComparer.Ordinal);
} }
string[]? scopes = JsonSerializer.Deserialize<string[]>(value); string[]? scopes;
try
{
scopes = JsonSerializer.Deserialize<string[]>(value);
}
catch (JsonException)
{
// Fail closed: a corrupt scopes column yields no scopes rather than an exception on the
// verification hot path. The verifier's "only exception path is cancellation" contract
// is preserved, and a key with an unreadable scope set is left with zero authority.
return new HashSet<string>(StringComparer.Ordinal);
}
return new HashSet<string>(scopes ?? [], StringComparer.Ordinal); return new HashSet<string>(scopes ?? [], StringComparer.Ordinal);
} }
@@ -4,7 +4,8 @@ using ZB.MOM.WW.Auth.Abstractions.ApiKeys;
namespace ZB.MOM.WW.Auth.ApiKeys.Sqlite; namespace ZB.MOM.WW.Auth.ApiKeys.Sqlite;
/// <summary> /// <summary>
/// SQLite-backed administration store for API keys (create, revoke, rotate, delete). /// SQLite-backed administration store for API keys (create, revoke, rotate, delete,
/// set-scopes, enable/disable).
/// </summary> /// </summary>
public sealed class SqliteApiKeyAdminStore(AuthSqliteConnectionFactory connectionFactory) : IApiKeyAdminStore public sealed class SqliteApiKeyAdminStore(AuthSqliteConnectionFactory connectionFactory) : IApiKeyAdminStore
{ {
@@ -85,6 +86,67 @@ public sealed class SqliteApiKeyAdminStore(AuthSqliteConnectionFactory connectio
return rows > 0; return rows > 0;
} }
/// <inheritdoc />
public async Task<bool> SetScopesAsync(string keyId, IReadOnlySet<string> scopes, CancellationToken ct)
{
ArgumentException.ThrowIfNullOrWhiteSpace(keyId);
ArgumentNullException.ThrowIfNull(scopes);
await using SqliteConnection connection =
await connectionFactory.OpenConnectionAsync(ct).ConfigureAwait(false);
await using SqliteCommand command = connection.CreateCommand();
command.CommandText = """
UPDATE api_keys
SET scopes = $scopes
WHERE key_id = $key_id;
""";
command.Parameters.AddWithValue("$key_id", keyId);
command.Parameters.AddWithValue("$scopes", ScopeSerializer.Serialize(scopes));
int rows = await command.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc />
public async Task<bool> SetEnabledAsync(string keyId, bool enabled, DateTimeOffset whenUtc, CancellationToken ct)
{
ArgumentException.ThrowIfNullOrWhiteSpace(keyId);
await using SqliteConnection connection =
await connectionFactory.OpenConnectionAsync(ct).ConfigureAwait(false);
await using SqliteCommand command = connection.CreateCommand();
// Reversible toggle: NO `revoked_utc IS NULL` guard (unlike RevokeAsync), so it works
// regardless of current state. Deliberately leaves secret_hash and last_used_utc untouched
// — that is what distinguishes re-enable from RotateAsync.
if (enabled)
{
command.CommandText = """
UPDATE api_keys
SET revoked_utc = NULL
WHERE key_id = $key_id;
""";
command.Parameters.AddWithValue("$key_id", keyId);
}
else
{
command.CommandText = """
UPDATE api_keys
SET revoked_utc = $revoked_utc
WHERE key_id = $key_id;
""";
command.Parameters.AddWithValue("$key_id", keyId);
command.Parameters.AddWithValue("$revoked_utc", whenUtc.ToString("O"));
}
int rows = await command.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
return rows > 0;
}
/// <inheritdoc /> /// <inheritdoc />
public async Task<bool> DeleteAsync(string keyId, CancellationToken ct) public async Task<bool> DeleteAsync(string keyId, CancellationToken ct)
{ {
@@ -5,8 +5,15 @@ namespace ZB.MOM.WW.Auth.ApiKeys.Sqlite;
/// </summary> /// </summary>
public static class SqliteAuthSchema public static class SqliteAuthSchema
{ {
/// <summary>The schema version this build creates and supports.</summary> /// <summary>
public const int CurrentVersion = 1; /// The schema version this build creates and supports. This is <c>2</c>, not <c>1</c>,
/// to match the deployed databases of the donor (MxAccessGateway) this store was
/// extracted from: that store reached its final shape via a v1→v2 history and stamps
/// <c>version = 2</c> on disk. The final schema has been byte-identical since v1, so a
/// single-shot create stamped as 2 interoperates with existing <c>gateway-auth.db</c>
/// files (the migrator only refuses an on-disk version <em>newer</em> than this).
/// </summary>
public const int CurrentVersion = 2;
/// <summary>Name of the single-row table tracking the applied schema version.</summary> /// <summary>Name of the single-row table tracking the applied schema version.</summary>
public const string SchemaVersionTable = "schema_version"; public const string SchemaVersionTable = "schema_version";
@@ -35,7 +35,7 @@ public sealed class SqliteAuthStoreMigrator(AuthSqliteConnectionFactory connecti
$"Auth database schema version {existingVersion} is newer than supported version {SqliteAuthSchema.CurrentVersion}."); $"Auth database schema version {existingVersion} is newer than supported version {SqliteAuthSchema.CurrentVersion}.");
} }
await ApplyVersionOneAsync(connection, transaction, cancellationToken).ConfigureAwait(false); await ApplySchemaAsync(connection, transaction, cancellationToken).ConfigureAwait(false);
await WriteSchemaVersionAsync(connection, transaction, cancellationToken).ConfigureAwait(false); await WriteSchemaVersionAsync(connection, transaction, cancellationToken).ConfigureAwait(false);
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false); await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
@@ -78,7 +78,10 @@ public sealed class SqliteAuthStoreMigrator(AuthSqliteConnectionFactory connecti
: Convert.ToInt32(version, CultureInfo.InvariantCulture); : Convert.ToInt32(version, CultureInfo.InvariantCulture);
} }
private static async Task ApplyVersionOneAsync( // Single-shot create of the final schema (all DDL is CREATE ... IF NOT EXISTS, so it is
// idempotent against an already-provisioned database). The applied version is stamped
// separately by WriteSchemaVersionAsync.
private static async Task ApplySchemaAsync(
SqliteConnection connection, SqliteConnection connection,
SqliteTransaction transaction, SqliteTransaction transaction,
CancellationToken cancellationToken) CancellationToken cancellationToken)
@@ -37,7 +37,14 @@ public static class ServiceCollectionExtensions
ArgumentNullException.ThrowIfNull(config); ArgumentNullException.ThrowIfNull(config);
ArgumentException.ThrowIfNullOrWhiteSpace(sectionPath); ArgumentException.ThrowIfNullOrWhiteSpace(sectionPath);
services.Configure<LdapOptions>(config.GetSection(sectionPath)); // Bind via the options builder and opt into start-time validation. An IValidateOptions<T>
// otherwise only runs when the options are first materialized (IOptions<T>.Value) — which
// here is the first login (ILdapAuthService factory below), not boot. ValidateOnStart hooks
// the host's start-time options validation so a misconfigured directory (e.g. insecure
// transport without AllowInsecure) fails fast at startup rather than on first login.
services.AddOptions<LdapOptions>()
.Bind(config.GetSection(sectionPath))
.ValidateOnStart();
// Fail fast at startup on a misconfigured directory rather than on first login. // Fail fast at startup on a misconfigured directory rather than on first login.
services.AddSingleton<IValidateOptions<LdapOptions>, LdapOptionsValidator>(); services.AddSingleton<IValidateOptions<LdapOptions>, LdapOptionsValidator>();
@@ -1,5 +1,6 @@
namespace ZB.MOM.WW.Auth.Ldap.Internal; namespace ZB.MOM.WW.Auth.Ldap.Internal;
using System.Net.Security;
using ZB.MOM.WW.Auth.Abstractions.Ldap; using ZB.MOM.WW.Auth.Abstractions.Ldap;
/// <summary> /// <summary>
@@ -15,8 +16,29 @@ internal sealed record LdapSearchEntry(
/// </summary> /// </summary>
internal interface ILdapConnection : IDisposable internal interface ILdapConnection : IDisposable
{ {
/// <summary>Opens (and optionally upgrades to TLS) a connection to the given host.</summary> /// <summary>
void Connect(string host, int port, LdapTransport transport, bool allowInsecure, int timeoutMs); /// Opens (and optionally upgrades to TLS) a connection to the given host.
/// </summary>
/// <param name="host">The LDAP server hostname or IP.</param>
/// <param name="port">The LDAP server port.</param>
/// <param name="transport">The transport security mode.</param>
/// <param name="allowInsecure">
/// When <see langword="true"/> AND no <paramref name="serverCertificateValidationCallback"/> is
/// supplied, TLS server-certificate validation is bypassed (dev/test only). Ignored when a
/// validation callback is supplied (the callback wins) or for plaintext transport.
/// </param>
/// <param name="timeoutMs">The connection/operation timeout in milliseconds.</param>
/// <param name="serverCertificateValidationCallback">
/// Optional TLS server-certificate validation callback. When <see langword="null"/>, the OS trust
/// store is used (the client does not blind-accept).
/// </param>
void Connect(
string host,
int port,
LdapTransport transport,
bool allowInsecure,
int timeoutMs,
RemoteCertificateValidationCallback? serverCertificateValidationCallback);
/// <summary>Binds with the supplied DN and password. Throws <c>LdapException</c> on bad credentials.</summary> /// <summary>Binds with the supplied DN and password. Throws <c>LdapException</c> on bad credentials.</summary>
void Bind(string dn, string password); void Bind(string dn, string password);
@@ -2,19 +2,67 @@ namespace ZB.MOM.WW.Auth.Ldap.Internal;
using Novell.Directory.Ldap; using Novell.Directory.Ldap;
using ZB.MOM.WW.Auth.Abstractions.Ldap; using ZB.MOM.WW.Auth.Abstractions.Ldap;
// Disambiguate: Novell also declares a RemoteCertificateValidationCallback delegate; the seam and
// LdapConnectionOptions.ConfigureRemoteCertificateValidationCallback both use the BCL one.
using RemoteCertificateValidationCallback = System.Net.Security.RemoteCertificateValidationCallback;
/// <summary> /// <summary>
/// Production <see cref="ILdapConnection"/> backed by <c>Novell.Directory.Ldap.LdapConnection</c>. /// Production <see cref="ILdapConnection"/> backed by <c>Novell.Directory.Ldap.LdapConnection</c>.
/// Mirrors the connection/search idioms from ZB.MOM.WW.ScadaBridge.Security.LdapAuthService. /// Mirrors the connection/search idioms from ZB.MOM.WW.ScadaBridge.Security.LdapAuthService.
/// </summary> /// </summary>
/// <remarks>
/// TLS server-certificate validation: by default the underlying
/// <c>Novell.Directory.Ldap.NETStandard</c> client validates the server certificate against the OS
/// trust store (it does NOT blind-accept). A caller-supplied
/// <c>RemoteCertificateValidationCallback</c> overrides that default (CA pinning / SAN checks); when
/// none is supplied and <c>allowInsecure</c> is set, validation is bypassed for dev/test only.
/// </remarks>
internal sealed class NovellLdapConnection : ILdapConnection internal sealed class NovellLdapConnection : ILdapConnection
{ {
private readonly LdapConnection _conn = new(); private readonly LdapConnection _conn;
private bool _disposed; private bool _disposed;
/// <inheritdoc/> /// <summary>
public void Connect(string host, int port, LdapTransport transport, bool allowInsecure, int timeoutMs) /// Builds the connection, wiring a TLS server-certificate validation policy: a supplied
/// <paramref name="serverCertificateValidationCallback"/> wins; otherwise <paramref name="allowInsecure"/>
/// bypasses validation (dev/test only); otherwise the OS-trust-store default applies.
/// </summary>
public NovellLdapConnection(
bool allowInsecure = false,
RemoteCertificateValidationCallback? serverCertificateValidationCallback = null)
{ {
if (serverCertificateValidationCallback is not null)
{
var options = new LdapConnectionOptions()
.ConfigureRemoteCertificateValidationCallback(serverCertificateValidationCallback);
_conn = new LdapConnection(options);
}
else if (allowInsecure)
{
// Dev/test only: accept any server certificate. Reachable solely when an operator has set
// AllowInsecure (rejected for plaintext-without-AllowInsecure by LdapOptionsValidator).
var options = new LdapConnectionOptions()
.ConfigureRemoteCertificateValidationCallback((_, _, _, _) => true);
_conn = new LdapConnection(options);
}
else
{
// Default: validate against the OS trust store (no blind-accept).
_conn = new LdapConnection();
}
}
/// <inheritdoc/>
public void Connect(
string host,
int port,
LdapTransport transport,
bool allowInsecure,
int timeoutMs,
RemoteCertificateValidationCallback? serverCertificateValidationCallback)
{
// The TLS-validation policy (allowInsecure / callback) is wired at construction time on the
// LdapConnectionOptions; the per-call arguments here are accepted for seam symmetry.
ApplyTimeout(timeoutMs); ApplyTimeout(timeoutMs);
// LDAPS: TLS is negotiated at the TCP-connection level. // LDAPS: TLS is negotiated at the TCP-connection level.
@@ -98,8 +146,16 @@ internal sealed class NovellLdapConnection : ILdapConnection
} }
} }
/// <summary>Factory that produces fresh <see cref="NovellLdapConnection"/> instances.</summary> /// <summary>
internal sealed class NovellLdapConnectionFactory : ILdapConnectionFactory /// Factory that produces fresh <see cref="NovellLdapConnection"/> instances, carrying the TLS
/// server-certificate validation policy (a supplied callback, or an <c>allowInsecure</c> bypass) so
/// it is wired onto each connection at construction time.
/// </summary>
internal sealed class NovellLdapConnectionFactory(
bool allowInsecure = false,
RemoteCertificateValidationCallback? serverCertificateValidationCallback = null)
: ILdapConnectionFactory
{ {
public ILdapConnection Create() => new NovellLdapConnection(); public ILdapConnection Create() =>
new NovellLdapConnection(allowInsecure, serverCertificateValidationCallback);
} }
@@ -26,10 +26,14 @@ public sealed class LdapAuthService : ILdapAuthService
/// <summary> /// <summary>
/// Production constructor: binds against a live directory via the real /// Production constructor: binds against a live directory via the real
/// Novell-backed connection factory. /// Novell-backed connection factory. The TLS server-certificate validation policy
/// (<see cref="LdapOptions.ServerCertificateValidationCallback"/> or the
/// <see cref="LdapOptions.AllowInsecure"/> bypass) is carried into the factory so each
/// connection is built with it.
/// </summary> /// </summary>
public LdapAuthService(LdapOptions options) public LdapAuthService(LdapOptions options)
: this(options, new NovellLdapConnectionFactory()) : this(options, new NovellLdapConnectionFactory(
options.AllowInsecure, options.ServerCertificateValidationCallback))
{ {
} }
@@ -92,7 +96,13 @@ public sealed class LdapAuthService : ILdapAuthService
// Abstractions change could add DirectoryUnavailable to disambiguate. // Abstractions change could add DirectoryUnavailable to disambiguate.
try try
{ {
conn.Connect(_options.Server, _options.Port, _options.Transport, _options.AllowInsecure, _options.ConnectionTimeoutMs); conn.Connect(
_options.Server,
_options.Port,
_options.Transport,
_options.AllowInsecure,
_options.ConnectionTimeoutMs,
_options.ServerCertificateValidationCallback);
} }
catch (LdapException) catch (LdapException)
{ {
@@ -9,7 +9,9 @@ namespace ZB.MOM.WW.Auth.Ldap;
/// low-level error on the first real login attempt. /// low-level error on the first real login attempt.
/// </summary> /// </summary>
/// <remarks> /// <remarks>
/// Four conditions are enforced: /// Validation is skipped entirely when <see cref="LdapOptions.Enabled"/> is <c>false</c>
/// (a disabled provider's connection fields are inert). When enabled, four conditions
/// are enforced:
/// <list type="bullet"> /// <list type="bullet">
/// <item>plaintext transport (<see cref="LdapTransport.None"/>) is rejected unless /// <item>plaintext transport (<see cref="LdapTransport.None"/>) is rejected unless
/// <see cref="LdapOptions.AllowInsecure"/> is explicitly set (dev/test only);</item> /// <see cref="LdapOptions.AllowInsecure"/> is explicitly set (dev/test only);</item>
@@ -27,6 +29,14 @@ public sealed class LdapOptionsValidator : IValidateOptions<LdapOptions>
{ {
ArgumentNullException.ThrowIfNull(options); ArgumentNullException.ThrowIfNull(options);
// When LDAP is disabled, its connection fields are inert — do not require them.
// A consumer that turns LDAP off should not have to supply a server/search-base/
// service-account just to satisfy startup validation.
if (!options.Enabled)
{
return ValidateOptionsResult.Success;
}
if (options.Transport == LdapTransport.None && !options.AllowInsecure) if (options.Transport == LdapTransport.None && !options.AllowInsecure)
{ {
return ValidateOptionsResult.Fail( return ValidateOptionsResult.Fail(
@@ -87,6 +87,33 @@ public sealed class ApiKeyAdminCommandsTests : IAsyncLifetime
Assert.Single(recent, e => e.EventType == "create-key"); Assert.Single(recent, e => e.EventType == "create-key");
} }
[Fact]
public async Task CreateKey_PersistsBareTokenPrefix_NotPrefixUnderscoreKeyId()
{
// Auth-005: KeyPrefix is the bare token prefix ("mxgw"), NOT "mxgw_key-1". The key id is
// already its own column; embedding it produced a self-referential value that disagreed with
// the read/test paths and confused admin tooling.
ApiKeyAdminCommands commands = BuildCommands();
await commands.InitDbAsync(null, CancellationToken.None);
await commands.CreateKeyAsync(
"key-1",
"Service A",
new HashSet<string>(["read"], StringComparer.Ordinal),
constraintsJson: null,
remoteAddress: null,
CancellationToken.None);
ApiKeyRecord? found = await _read.FindByKeyIdAsync("key-1", CancellationToken.None);
Assert.NotNull(found);
Assert.Equal("mxgw", found!.KeyPrefix);
// The same bare prefix is surfaced by the admin list projection.
IReadOnlyList<ApiKeyListItem> listed = await commands.ListKeysAsync(CancellationToken.None);
ApiKeyListItem item = Assert.Single(listed, k => k.KeyId == "key-1");
Assert.Equal("mxgw", item.KeyPrefix);
}
[Fact] [Fact]
public async Task CreateKey_PepperUnavailable_ReturnsNoTokenAndAppendsNoAudit() public async Task CreateKey_PepperUnavailable_ReturnsNoTokenAndAppendsNoAudit()
{ {
@@ -265,6 +292,59 @@ public sealed class ApiKeyAdminCommandsTests : IAsyncLifetime
Assert.Equal(auditCountBefore, auditCountAfter); Assert.Equal(auditCountBefore, auditCountAfter);
} }
// --- set-scopes / enable-disable ---
[Fact]
public async Task SetEnabledAsync_And_SetScopesAsync_AppendAuditEntries()
{
ApiKeyAdminCommands commands = BuildCommands();
await commands.InitDbAsync(null, CancellationToken.None);
await commands.CreateKeyAsync(
"key-1",
"Service A",
new HashSet<string>(["read"], StringComparer.Ordinal),
null,
null,
CancellationToken.None);
// Disable, then re-enable, then replace scopes.
KeyActionResult disabled =
await commands.SetEnabledAsync("key-1", enabled: false, "10.0.0.1", CancellationToken.None);
Assert.True(disabled.Succeeded);
Assert.Null(await _read.FindActiveByKeyIdAsync("key-1", CancellationToken.None));
KeyActionResult enabled =
await commands.SetEnabledAsync("key-1", enabled: true, "10.0.0.1", CancellationToken.None);
Assert.True(enabled.Succeeded);
Assert.NotNull(await _read.FindActiveByKeyIdAsync("key-1", CancellationToken.None));
KeyActionResult scoped = await commands.SetScopesAsync(
"key-1",
new HashSet<string>(["read", "write"], StringComparer.Ordinal),
"10.0.0.1",
CancellationToken.None);
Assert.True(scoped.Succeeded);
IReadOnlyList<ApiKeyAuditEntry> recent = await _audit.ListRecentAsync(50, CancellationToken.None);
Assert.Single(recent, e => e.EventType == "disable-key");
Assert.Single(recent, e => e.EventType == "enable-key");
Assert.Single(recent, e => e.EventType == "set-scopes");
IReadOnlyList<ApiKeyListItem> listed = await commands.ListKeysAsync(CancellationToken.None);
ApiKeyListItem item = Assert.Single(listed, k => k.KeyId == "key-1");
Assert.True(item.Scopes.SetEquals(new HashSet<string>(["read", "write"], StringComparer.Ordinal)));
}
[Fact]
public async Task SetScopesAsync_NullScopes_Throws()
{
ApiKeyAdminCommands commands = BuildCommands();
await commands.InitDbAsync(null, CancellationToken.None);
await Assert.ThrowsAnyAsync<ArgumentException>(() =>
commands.SetScopesAsync("key-1", null!, null, CancellationToken.None));
}
// --- delete-key --- // --- delete-key ---
[Fact] [Fact]
@@ -212,6 +212,51 @@ public class ApiKeyVerifierTests
Assert.DoesNotContain(Convert.ToBase64String(hash), identityText, StringComparison.Ordinal); Assert.DoesNotContain(Convert.ToBase64String(hash), identityText, StringComparison.Ordinal);
} }
// --- Auth-002: a failed best-effort MarkUsedAsync must NOT fail a valid key ---
[Fact]
public async Task VerifyAsync_ValidKey_MarkUsedThrows_StillSucceeds()
{
// MarkUsedAsync is best-effort "last used" bookkeeping. A transient storage failure
// (SQLITE_BUSY, disk full, locked DB) must not turn an otherwise-valid credential into a
// failed auth: the decision is already made before the usage write. The verifier's contract
// is "the only exception path is cancellation", so a non-cancellation MarkUsedAsync failure
// is swallowed and the result is still Succeeded == true.
byte[] hash = ApiKeySecretHasher.Hash(Secret, Pepper);
var store = new FakeApiKeyStore
{
Record = BuildRecord(hash),
MarkUsedException = new InvalidOperationException("SQLITE_BUSY"),
};
var verifier = BuildVerifier(store, new FakePepperProvider(Pepper));
ApiKeyVerification result =
await verifier.VerifyAsync(Header(KeyId, Secret), CancellationToken.None);
Assert.True(result.Succeeded);
Assert.Null(result.Failure);
Assert.NotNull(result.Identity);
Assert.Equal(KeyId, result.Identity!.KeyId);
Assert.True(store.MarkUsedCalled);
}
[Fact]
public async Task VerifyAsync_MarkUsedThrowsOperationCanceled_Propagates()
{
// The ONLY exception path is cancellation: an OperationCanceledException from the usage
// write (e.g. the request was cancelled mid-write) is honoured and re-thrown, not swallowed.
byte[] hash = ApiKeySecretHasher.Hash(Secret, Pepper);
var store = new FakeApiKeyStore
{
Record = BuildRecord(hash),
MarkUsedException = new OperationCanceledException(),
};
var verifier = BuildVerifier(store, new FakePepperProvider(Pepper));
await Assert.ThrowsAnyAsync<OperationCanceledException>(
() => verifier.VerifyAsync(Header(KeyId, Secret), CancellationToken.None));
}
// --- Cancellation --- // --- Cancellation ---
[Fact] [Fact]
@@ -253,6 +298,9 @@ public class ApiKeyVerifierTests
public string? MarkUsedKeyId { get; private set; } public string? MarkUsedKeyId { get; private set; }
public DateTimeOffset? MarkUsedWhenUtc { get; private set; } public DateTimeOffset? MarkUsedWhenUtc { get; private set; }
/// <summary>When set, <see cref="MarkUsedAsync"/> throws this exception (after recording the call).</summary>
public Exception? MarkUsedException { get; set; }
public Task<ApiKeyRecord?> FindByKeyIdAsync(string keyId, CancellationToken ct) public Task<ApiKeyRecord?> FindByKeyIdAsync(string keyId, CancellationToken ct)
{ {
FindByKeyIdCalled = true; FindByKeyIdCalled = true;
@@ -267,6 +315,11 @@ public class ApiKeyVerifierTests
MarkUsedCalled = true; MarkUsedCalled = true;
MarkUsedKeyId = keyId; MarkUsedKeyId = keyId;
MarkUsedWhenUtc = whenUtc; MarkUsedWhenUtc = whenUtc;
if (MarkUsedException is not null)
{
return Task.FromException(MarkUsedException);
}
return Task.CompletedTask; return Task.CompletedTask;
} }
} }
@@ -105,6 +105,87 @@ public sealed class SqliteApiKeyAdminStoreTests : IAsyncLifetime
Assert.False(result); Assert.False(result);
} }
// --- SetScopes ---
[Fact]
public async Task SetScopesAsync_ReplacesScopes_AndReturnsTrue()
{
await _admin.CreateAsync(
SampleRecord("key-1") with { Scopes = new HashSet<string>(["a"], StringComparer.Ordinal) },
CancellationToken.None);
bool result = await _admin.SetScopesAsync(
"key-1",
new HashSet<string>(["b", "c"], StringComparer.Ordinal),
CancellationToken.None);
Assert.True(result);
IReadOnlyList<ApiKeyListItem> listed = await _admin.ListAsync(CancellationToken.None);
ApiKeyListItem item = Assert.Single(listed, k => k.KeyId == "key-1");
Assert.True(item.Scopes.SetEquals(new HashSet<string>(["b", "c"], StringComparer.Ordinal)));
}
[Fact]
public async Task SetScopesAsync_UnknownKey_ReturnsFalse()
{
bool result = await _admin.SetScopesAsync(
"missing",
new HashSet<string>(["b"], StringComparer.Ordinal),
CancellationToken.None);
Assert.False(result);
}
// --- SetEnabled ---
[Fact]
public async Task SetEnabledAsync_False_DisablesKey()
{
await _admin.CreateAsync(SampleRecord("key-1"), CancellationToken.None);
var when = new DateTimeOffset(2026, 5, 31, 9, 0, 0, TimeSpan.Zero);
bool result = await _admin.SetEnabledAsync("key-1", enabled: false, when, CancellationToken.None);
Assert.True(result);
Assert.Null(await _read.FindActiveByKeyIdAsync("key-1", CancellationToken.None));
ApiKeyRecord? found = await _read.FindByKeyIdAsync("key-1", CancellationToken.None);
Assert.Equal(when, found!.RevokedUtc);
}
[Fact]
public async Task SetEnabledAsync_True_ReenablesKey_WithoutChangingSecret()
{
ApiKeyRecord original = SampleRecord("key-1");
await _admin.CreateAsync(original, CancellationToken.None);
// Record some usage so we can prove last_used_utc is left untouched on re-enable.
var used = new DateTimeOffset(2026, 5, 20, 12, 0, 0, TimeSpan.Zero);
await _read.MarkUsedAsync("key-1", used, CancellationToken.None);
// Disable, then re-enable.
await _admin.SetEnabledAsync(
"key-1", enabled: false, new DateTimeOffset(2026, 5, 31, 9, 0, 0, TimeSpan.Zero), CancellationToken.None);
bool result = await _admin.SetEnabledAsync(
"key-1", enabled: true, new DateTimeOffset(2026, 6, 1, 9, 0, 0, TimeSpan.Zero), CancellationToken.None);
Assert.True(result);
// Active again, and the secret hash + last-used timestamp are unchanged.
ApiKeyRecord? active = await _read.FindActiveByKeyIdAsync("key-1", CancellationToken.None);
Assert.NotNull(active);
Assert.True(active!.SecretHash.SequenceEqual(original.SecretHash));
Assert.Null(active.RevokedUtc);
Assert.Equal(used, active.LastUsedUtc);
}
[Fact]
public async Task SetEnabledAsync_UnknownKey_ReturnsFalse()
{
bool result = await _admin.SetEnabledAsync(
"missing", enabled: false, DateTimeOffset.UtcNow, CancellationToken.None);
Assert.False(result);
}
// --- Delete --- // --- Delete ---
[Fact] [Fact]
@@ -172,6 +253,73 @@ public sealed class SqliteApiKeyAdminStoreTests : IAsyncLifetime
() => _admin.DeleteAsync(keyId!, CancellationToken.None)); () => _admin.DeleteAsync(keyId!, CancellationToken.None));
} }
[Theory]
[InlineData(null)]
[InlineData("")]
[InlineData(" ")]
public async Task SetScopesAsync_NullOrWhitespaceKeyId_ThrowsArgumentException(string? keyId)
{
await Assert.ThrowsAnyAsync<ArgumentException>(
() => _admin.SetScopesAsync(
keyId!,
new HashSet<string>(["read"], StringComparer.Ordinal),
CancellationToken.None));
}
[Theory]
[InlineData(null)]
[InlineData("")]
[InlineData(" ")]
public async Task SetEnabledAsync_NullOrWhitespaceKeyId_ThrowsArgumentException(string? keyId)
{
await Assert.ThrowsAnyAsync<ArgumentException>(
() => _admin.SetEnabledAsync(keyId!, enabled: false, DateTimeOffset.UtcNow, CancellationToken.None));
}
[Fact]
public async Task SetScopesAsync_NullScopes_ThrowsArgumentNullException()
{
await _admin.CreateAsync(SampleRecord("key-1"), CancellationToken.None);
await Assert.ThrowsAsync<ArgumentNullException>(
() => _admin.SetScopesAsync("key-1", null!, CancellationToken.None));
}
// --- SetEnabled idempotence ---
[Fact]
public async Task SetEnabledAsync_OnAlreadyActiveKey_ReturnsTrue()
{
await _admin.CreateAsync(SampleRecord("key-1"), CancellationToken.None);
bool result = await _admin.SetEnabledAsync(
"key-1", enabled: true, DateTimeOffset.UtcNow, CancellationToken.None);
Assert.True(result);
ApiKeyRecord? active = await _read.FindActiveByKeyIdAsync("key-1", CancellationToken.None);
Assert.NotNull(active);
Assert.Null(active!.RevokedUtc);
}
[Fact]
public async Task SetEnabledAsync_OnAlreadyDisabledKey_OverwritesTimestamp_ReturnsTrue()
{
await _admin.CreateAsync(SampleRecord("key-1"), CancellationToken.None);
var t1 = new DateTimeOffset(2026, 5, 1, 10, 0, 0, TimeSpan.Zero);
var t2 = new DateTimeOffset(2026, 5, 15, 10, 0, 0, TimeSpan.Zero);
// Disable at t1.
await _admin.SetEnabledAsync("key-1", enabled: false, t1, CancellationToken.None);
// Disable again at a later t2 (idempotent overwrite — no guard on revoked_utc).
bool result = await _admin.SetEnabledAsync("key-1", enabled: false, t2, CancellationToken.None);
Assert.True(result);
IReadOnlyList<ApiKeyListItem> listed = await _admin.ListAsync(CancellationToken.None);
ApiKeyListItem item = Assert.Single(listed, k => k.KeyId == "key-1");
Assert.Equal(t2, item.RevokedUtc);
}
// --- Audit --- // --- Audit ---
[Fact] [Fact]
@@ -164,6 +164,36 @@ public sealed class SqliteApiKeyStoreTests : IAsyncLifetime
Assert.Empty(ScopeSerializer.Deserialize("")); Assert.Empty(ScopeSerializer.Deserialize(""));
} }
// --- Auth-003: corrupt scopes JSON must fail closed (empty set), never throw JsonException ---
[Theory]
[InlineData("not json at all")]
[InlineData("{")]
[InlineData("{\"a\":1}")] // valid JSON, but an object, not a string[]
[InlineData("42")] // valid JSON, but a number
[InlineData("[\"read\",")] // truncated/partial write
public void ScopeSerializer_DeserializeMalformed_ReturnsEmptySet_DoesNotThrow(string value)
{
// A poisoned scopes column (tampering, partial write, format change, buggy writer) must
// degrade to a zero-scope set rather than throwing on the verification hot path.
IReadOnlySet<string> scopes = ScopeSerializer.Deserialize(value);
Assert.Empty(scopes);
}
[Fact]
public async Task FindByKeyId_CorruptScopesColumn_ReturnsRecordWithEmptyScopes_DoesNotThrow()
{
// Insert a row whose scopes column holds malformed (non-array) JSON, then read it through
// the store. The store must NOT propagate a JsonException out of FindByKeyIdAsync (which the
// verifier relies on for its "only exception path is cancellation" contract).
await InsertWithRawScopesAsync("key-corrupt", scopesJson: "{ this is not valid json");
ApiKeyRecord? found = await _store.FindByKeyIdAsync("key-corrupt", CancellationToken.None);
Assert.NotNull(found);
Assert.Empty(found!.Scopes);
}
private static ApiKeyRecord SampleRecord(string keyId) => new( private static ApiKeyRecord SampleRecord(string keyId) => new(
KeyId: keyId, KeyId: keyId,
KeyPrefix: "mxgw_ab12", KeyPrefix: "mxgw_ab12",
@@ -213,6 +243,33 @@ public sealed class SqliteApiKeyStoreTests : IAsyncLifetime
await command.ExecuteNonQueryAsync(CancellationToken.None); await command.ExecuteNonQueryAsync(CancellationToken.None);
} }
private async Task InsertWithRawScopesAsync(string keyId, string scopesJson)
{
// Writes the scopes column verbatim (NOT via ScopeSerializer.Serialize) so a malformed
// value can be persisted to simulate tampering / a partial or buggy write.
await using SqliteConnection connection =
await _factory.OpenConnectionAsync(CancellationToken.None);
await using SqliteCommand command = connection.CreateCommand();
command.CommandText = """
INSERT INTO api_keys (
key_id, key_prefix, secret_hash, display_name, scopes,
constraints, created_utc, last_used_utc, revoked_utc)
VALUES (
$key_id, $key_prefix, $secret_hash, $display_name, $scopes,
$constraints, $created_utc, $last_used_utc, $revoked_utc);
""";
command.Parameters.AddWithValue("$key_id", keyId);
command.Parameters.AddWithValue("$key_prefix", "mxgw");
command.Parameters.Add("$secret_hash", SqliteType.Blob).Value = new byte[] { 1, 2, 3 };
command.Parameters.AddWithValue("$display_name", "Corrupt Key");
command.Parameters.AddWithValue("$scopes", scopesJson);
command.Parameters.AddWithValue("$constraints", DBNull.Value);
command.Parameters.AddWithValue("$created_utc", DateTimeOffset.UnixEpoch.ToString("O"));
command.Parameters.AddWithValue("$last_used_utc", DBNull.Value);
command.Parameters.AddWithValue("$revoked_utc", DBNull.Value);
await command.ExecuteNonQueryAsync(CancellationToken.None);
}
public Task DisposeAsync() public Task DisposeAsync()
{ {
SqliteConnection.ClearAllPools(); SqliteConnection.ClearAllPools();
@@ -34,6 +34,27 @@ public sealed class SqliteMigratorTests : IDisposable
Assert.Equal(1, await CountSchemaVersionRowsAsync()); Assert.Equal(1, await CountSchemaVersionRowsAsync());
} }
[Fact]
public void CurrentVersion_Is2_ToMatchDonorGatewayDeployedSchema() =>
// The store was extracted from MxAccessGateway, whose deployed gateway-auth.db is
// stamped version 2. The library must stamp 2 (not reset to 1) so it does not refuse
// those existing databases on first boot. Locking this invariant.
Assert.Equal(2, SqliteAuthSchema.CurrentVersion);
[Fact]
public async Task MigrateAsync_AgainstExistingVersion2Db_DoesNotThrow_AndStaysAt2()
{
// The deployed-gateway scenario: a database already provisioned at version 2.
var migrator = new SqliteAuthStoreMigrator(Factory);
await migrator.MigrateAsync(CancellationToken.None);
await SetVersionAsync(2);
await migrator.MigrateAsync(CancellationToken.None); // must not throw
Assert.Equal(2, await ReadVersionAsync());
Assert.True(await TableExistsAsync(SqliteAuthSchema.ApiKeysTable));
}
[Fact] [Fact]
public async Task MigrateAsync_FutureSchemaVersion_Throws() public async Task MigrateAsync_FutureSchemaVersion_Throws()
{ {
@@ -1,5 +1,6 @@
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using ZB.MOM.WW.Auth.Abstractions.Ldap; using ZB.MOM.WW.Auth.Abstractions.Ldap;
using ZB.MOM.WW.Auth.AspNetCore; using ZB.MOM.WW.Auth.AspNetCore;
@@ -85,4 +86,52 @@ public class ServiceCollectionExtensionsTests
Assert.Contains(validators, v => v is LdapOptionsValidator); Assert.Contains(validators, v => v is LdapOptionsValidator);
} }
// --- Auth-001: ValidateOnStart must run options validation at host startup, not first login ---
private static IConfiguration BuildInsecureConfiguration() =>
new ConfigurationBuilder()
.AddInMemoryCollection(new Dictionary<string, string?>
{
[$"{LdapSection}:Server"] = LdapServer,
[$"{LdapSection}:SearchBase"] = "dc=example,dc=com",
[$"{LdapSection}:ServiceAccountDn"] = "cn=svc,dc=example,dc=com",
// Plaintext transport without AllowInsecure: the validator must reject this.
[$"{LdapSection}:Transport"] = nameof(LdapTransport.None),
[$"{LdapSection}:AllowInsecure"] = "false",
})
.Build();
[Fact]
public async Task AddZbLdapAuth_StartingHost_FailsForInsecureConfig()
{
// The misconfiguration must surface at host start, not deferred until the first login
// (i.e. the first ILdapAuthService resolution). ValidateOnStart wires the host's
// start-time options validation, so StartAsync must throw OptionsValidationException.
IConfiguration config = BuildInsecureConfiguration();
using IHost host = new HostBuilder()
.ConfigureServices(services => services.AddZbLdapAuth(config, LdapSection))
.Build();
OptionsValidationException ex =
await Assert.ThrowsAsync<OptionsValidationException>(() => host.StartAsync());
Assert.Contains(nameof(LdapOptions.Transport), string.Join(" ", ex.Failures));
}
[Fact]
public async Task AddZbLdapAuth_StartingHost_SucceedsForSecureConfig()
{
// A valid (secure) config must start cleanly — proving ValidateOnStart does not reject
// well-formed options.
IConfiguration config = BuildConfiguration();
using IHost host = new HostBuilder()
.ConfigureServices(services => services.AddZbLdapAuth(config, LdapSection))
.Build();
await host.StartAsync();
await host.StopAsync();
}
} }
@@ -1,3 +1,4 @@
using System.Net.Security;
using ZB.MOM.WW.Auth.Abstractions.Ldap; using ZB.MOM.WW.Auth.Abstractions.Ldap;
using ZB.MOM.WW.Auth.Ldap.Internal; using ZB.MOM.WW.Auth.Ldap.Internal;
@@ -19,6 +20,10 @@ internal sealed class FakeLdapConnection : ILdapConnection
// ---- observation ----- // ---- observation -----
public (string Host, int Port, LdapTransport Transport, bool AllowInsecure, int TimeoutMs)? ConnectArgs { get; private set; } public (string Host, int Port, LdapTransport Transport, bool AllowInsecure, int TimeoutMs)? ConnectArgs { get; private set; }
/// <summary>The server-certificate validation callback passed to the most recent <see cref="Connect"/> call.</summary>
public RemoteCertificateValidationCallback? ConnectCertCallback { get; private set; }
public List<string> BoundDns { get; } = new(); public List<string> BoundDns { get; } = new();
/// <summary> /// <summary>
@@ -107,9 +112,16 @@ internal sealed class FakeLdapConnection : ILdapConnection
// ---- ILdapConnection ----- // ---- ILdapConnection -----
public void Connect(string host, int port, LdapTransport transport, bool allowInsecure, int timeoutMs) public void Connect(
string host,
int port,
LdapTransport transport,
bool allowInsecure,
int timeoutMs,
RemoteCertificateValidationCallback? serverCertificateValidationCallback = null)
{ {
ConnectArgs = (host, port, transport, allowInsecure, timeoutMs); ConnectArgs = (host, port, transport, allowInsecure, timeoutMs);
ConnectCertCallback = serverCertificateValidationCallback;
if (_throwOnConnect) if (_throwOnConnect)
throw new Novell.Directory.Ldap.LdapException( throw new Novell.Directory.Ldap.LdapException(
"Directory unreachable", Novell.Directory.Ldap.LdapException.ConnectError, host); "Directory unreachable", Novell.Directory.Ldap.LdapException.ConnectError, host);
@@ -1,3 +1,4 @@
using System.Net.Security;
using ZB.MOM.WW.Auth.Abstractions.Ldap; using ZB.MOM.WW.Auth.Abstractions.Ldap;
using ZB.MOM.WW.Auth.Ldap; using ZB.MOM.WW.Auth.Ldap;
@@ -80,6 +81,56 @@ public class LdapAuthServiceTests
Assert.Equal(LdapAuthFailure.Disabled, (await svc.AuthenticateAsync("a", "b", default)).Failure); Assert.Equal(LdapAuthFailure.Disabled, (await svc.AuthenticateAsync("a", "b", default)).Failure);
} }
// --- Auth-006: TLS validation seam — allowInsecure is honoured and a cert-validation
// callback is threaded into the connection rather than being silently ignored. ---
[Fact]
public async Task Connect_ReceivesAllowInsecureFlag_FromOptions()
{
// The allowInsecure flag must reach the connection (it used to be an unused parameter).
var fake = new FakeLdapConnection().WithUserEntry(
"cn=alice,dc=x", memberOf: new[] { "cn=Engineers,ou=g,dc=x" });
var svc = new LdapAuthService(
Opts() with { AllowInsecure = true }, new FakeLdapConnectionFactory(fake));
await svc.AuthenticateAsync("alice", "pw", default);
Assert.NotNull(fake.ConnectArgs);
Assert.True(fake.ConnectArgs!.Value.AllowInsecure);
}
[Fact]
public async Task Connect_ReceivesConfiguredCertValidationCallback()
{
// A consumer-supplied RemoteCertificateValidationCallback must be passed through to the
// connection so production callers can pin a CA / validate the SAN — the seam no longer
// discards it.
RemoteCertificateValidationCallback callback = (_, _, _, _) => true;
var fake = new FakeLdapConnection().WithUserEntry(
"cn=alice,dc=x", memberOf: new[] { "cn=Engineers,ou=g,dc=x" });
var svc = new LdapAuthService(
Opts() with { ServerCertificateValidationCallback = callback },
new FakeLdapConnectionFactory(fake));
await svc.AuthenticateAsync("alice", "pw", default);
Assert.Same(callback, fake.ConnectCertCallback);
}
[Fact]
public async Task Connect_NoCertCallbackConfigured_PassesNull()
{
// Default: no callback configured -> null reaches the connection, which means the
// production adapter falls back to OS-trust-store validation (documented behaviour).
var fake = new FakeLdapConnection().WithUserEntry(
"cn=alice,dc=x", memberOf: new[] { "cn=Engineers,ou=g,dc=x" });
var svc = new LdapAuthService(Opts(), new FakeLdapConnectionFactory(fake));
await svc.AuthenticateAsync("alice", "pw", default);
Assert.Null(fake.ConnectCertCallback);
}
[Fact] [Fact]
public async Task PreservesEscapedCommaInGroupName_OnRfc4514Dn() public async Task PreservesEscapedCommaInGroupName_OnRfc4514Dn()
{ {
@@ -72,4 +72,20 @@ public class LdapOptionsValidatorTests
Assert.False(new LdapOptionsValidator() Assert.False(new LdapOptionsValidator()
.Validate(null, Opts()) .Validate(null, Opts())
.Failed); .Failed);
[Fact]
public void Validator_Skips_AllChecks_WhenDisabled() =>
// When LDAP is disabled its connection fields are inert; an otherwise-invalid
// config (plaintext + blank Server/SearchBase/ServiceAccountDn) must still pass.
Assert.False(new LdapOptionsValidator()
.Validate(null, new LdapOptions
{
Enabled = false,
Transport = LdapTransport.None,
AllowInsecure = false,
Server = "",
SearchBase = "",
ServiceAccountDn = "",
})
.Failed);
} }
+2 -2
View File
@@ -4,7 +4,7 @@ Startup configuration-validation library for the **ZB.MOM.WW SCADA family** (OtO
The library normalizes the three-project configuration-validation surface: a failure-accumulating `IValidateOptions` base, reusable rule primitives, a bind+validate+`ValidateOnStart` DI extension, and a pre-host `ConfigPreflight` aggregator for raw `IConfiguration` — so the plumbing is written once and domain rules stay per-project. The library normalizes the three-project configuration-validation surface: a failure-accumulating `IValidateOptions` base, reusable rule primitives, a bind+validate+`ValidateOnStart` DI extension, and a pre-host `ConfigPreflight` aggregator for raw `IConfiguration` — so the plumbing is written once and domain rules stay per-project.
**Built at 0.1.0. Not yet adopted by OtOpcUa, MxAccessGateway, or ScadaBridge.** Adoption tracked in `~/Desktop/scadaproj/components/configuration/GAPS.md`. **Built at 0.1.0. Adopted by OtOpcUa, MxAccessGateway, and ScadaBridge on 2026-06-01** (local default branches; not yet pushed to remotes). Adoption tracked in `~/Desktop/scadaproj/components/configuration/GAPS.md`.
--- ---
@@ -66,7 +66,7 @@ ZB.MOM.WW.Configuration/
## Status ## Status
Part of the **scadaproj component-normalization family** — this is the configuration + validation component. Built at **0.1.0**. **Not yet adopted by OtOpcUa, MxAccessGateway, or ScadaBridge** — follow-on adoption is tracked in: Part of the **scadaproj component-normalization family** — this is the configuration + validation component. Built at **0.1.0**. **Adopted by OtOpcUa, MxAccessGateway, and ScadaBridge on 2026-06-01** (local default branches; not yet pushed to remotes) — per-app result is tracked in:
- `~/Desktop/scadaproj/components/configuration/GAPS.md` - `~/Desktop/scadaproj/components/configuration/GAPS.md`
@@ -6,5 +6,6 @@
<LangVersion>latest</LangVersion> <LangVersion>latest</LangVersion>
<Version>0.1.0</Version> <Version>0.1.0</Version>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally> <ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup> </PropertyGroup>
</Project> </Project>
+1 -1
View File
@@ -101,7 +101,7 @@ No third-party packages; no ASP.NET Core framework reference.
## Status ## Status
**Built at 0.1.0. Not yet adopted by the three apps.** Adoption is tracked in the component backlog: **Built at 0.1.0. Adopted across all three apps on 2026-06-01** (local default branches; not yet pushed to remotes). Adoption is tracked in the component backlog:
- `~/Desktop/scadaproj/components/configuration/GAPS.md` - `~/Desktop/scadaproj/components/configuration/GAPS.md`
@@ -1,3 +1,5 @@
using System.Globalization;
namespace ZB.MOM.WW.Configuration; namespace ZB.MOM.WW.Configuration;
/// <summary> /// <summary>
@@ -16,11 +18,14 @@ internal static class Checks
/// <summary> /// <summary>
/// Validates a raw string as a TCP port (parse + range), returning <c>null</c> when valid. /// Validates a raw string as a TCP port (parse + range), returning <c>null</c> when valid.
/// Centralizes the port wording for callers that hold the raw config value. /// Centralizes the port wording for callers that hold the raw config value. Parsing is strict
/// and culture-invariant (<see cref="NumberStyles.None"/>): a leading sign or surrounding
/// whitespace is rejected. Both the parse-failure and range-failure messages quote the offending
/// raw value so they read consistently.
/// </summary> /// </summary>
internal static string? PortValue(string? raw, string field) => internal static string? PortValue(string? raw, string field) =>
int.TryParse(raw, out var port) int.TryParse(raw, NumberStyles.None, CultureInfo.InvariantCulture, out var port) && port is >= 1 and <= 65535
? Port(port, field) ? null
: $"{field} must be between 1 and 65535 (was '{raw ?? "null"}')"; : $"{field} must be between 1 and 65535 (was '{raw ?? "null"}')";
/// <summary> /// <summary>
@@ -33,7 +38,7 @@ internal static class Checks
var idx = value.LastIndexOf(':'); var idx = value.LastIndexOf(':');
if (idx <= 0 || idx == value.Length - 1 if (idx <= 0 || idx == value.Length - 1
|| value.AsSpan(0, idx).Contains(':') || value.AsSpan(0, idx).Contains(':')
|| !int.TryParse(value[(idx + 1)..], out var port) || !int.TryParse(value[(idx + 1)..], NumberStyles.None, CultureInfo.InvariantCulture, out var port)
|| port is < 1 or > 65535) || port is < 1 or > 65535)
return $"{field} must be 'host:port' with port 1-65535 (was '{value}')"; return $"{field} must be 'host:port' with port 1-65535 (was '{value}')";
return null; return null;
@@ -1,5 +1,6 @@
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
namespace ZB.MOM.WW.Configuration; namespace ZB.MOM.WW.Configuration;
@@ -33,7 +34,7 @@ public static class ServiceCollectionExtensions
ArgumentNullException.ThrowIfNull(configuration); ArgumentNullException.ThrowIfNull(configuration);
ArgumentException.ThrowIfNullOrWhiteSpace(sectionPath); ArgumentException.ThrowIfNullOrWhiteSpace(sectionPath);
services.AddSingleton<IValidateOptions<TOptions>, TValidator>(); services.TryAddEnumerable(ServiceDescriptor.Singleton<IValidateOptions<TOptions>, TValidator>());
return services.AddOptions<TOptions>() return services.AddOptions<TOptions>()
.Bind(configuration.GetSection(sectionPath)) .Bind(configuration.GetSection(sectionPath))
.ValidateOnStart(); .ValidateOnStart();
@@ -7,7 +7,11 @@
<PackageTags>configuration;options;validation;ivalidateoptions;validateonstart;startup;scada;wonderware;zb-mom-ww</PackageTags> <PackageTags>configuration;options;validation;ivalidateoptions;validateonstart;startup;scada;wonderware;zb-mom-ww</PackageTags>
<PackageProjectUrl>https://gitea.dohertylan.com/dohertj2/zb-mom-ww-configuration</PackageProjectUrl> <PackageProjectUrl>https://gitea.dohertylan.com/dohertj2/zb-mom-ww-configuration</PackageProjectUrl>
<RepositoryUrl>https://gitea.dohertylan.com/dohertj2/zb-mom-ww-configuration</RepositoryUrl> <RepositoryUrl>https://gitea.dohertylan.com/dohertj2/zb-mom-ww-configuration</RepositoryUrl>
<PackageReadmeFile>README.md</PackageReadmeFile>
</PropertyGroup> </PropertyGroup>
<ItemGroup>
<None Include="..\..\README.md" Pack="true" PackagePath="\" />
</ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.Extensions.Options" /> <PackageReference Include="Microsoft.Extensions.Options" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" /> <PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" />
@@ -44,4 +44,24 @@ public sealed class AddValidatedOptionsTests
Assert.Equal("central", opts.Name); Assert.Equal("central", opts.Name);
await host.StopAsync(); await host.StopAsync();
} }
[Fact]
public void Calling_twice_registers_validator_once()
{
var config = new ConfigurationBuilder()
.AddInMemoryCollection(new Dictionary<string, string?> { ["Node:Port"] = "0", ["Node:Name"] = "" })
.Build();
var services = new ServiceCollection();
services.AddValidatedOptions<NodeOptions, NodeValidator>(config, "Node");
services.AddValidatedOptions<NodeOptions, NodeValidator>(config, "Node");
using var provider = services.BuildServiceProvider();
var validators = provider.GetServices<IValidateOptions<NodeOptions>>().ToArray();
Assert.Single(validators);
// Resolving the options surfaces each accumulated failure exactly once, not doubled.
var ex = Assert.Throws<OptionsValidationException>(
() => provider.GetRequiredService<IOptions<NodeOptions>>().Value);
Assert.Equal(2, ex.Failures.Count());
}
} }
@@ -0,0 +1,92 @@
using Microsoft.Extensions.Configuration;
using ZB.MOM.WW.Configuration;
namespace ZB.MOM.WW.Configuration.Tests;
/// <summary>
/// Pins the exact failure-message wording produced by the shared <c>Checks</c> seam through its
/// public front-ends (<see cref="ConfigPreflight"/> for raw port values, <see cref="ValidationBuilder"/>
/// for host:port endpoints). Covers Configuration-002 (consistent quoting) and Configuration-003
/// (strict, culture-invariant port parsing).
/// </summary>
public sealed class ChecksWordingTests
{
private static IConfiguration Config(string key, string? value) =>
new ConfigurationBuilder()
.AddInMemoryCollection(new Dictionary<string, string?> { [key] = value })
.Build();
private static string PortFailure(string? rawValue)
{
var pf = ConfigPreflight.For(Config("X:Port", rawValue)).RequirePort("X:Port");
return Assert.Single(pf.Failures);
}
// Configuration-002: range failure and parse failure must quote the offending value the same way.
[Fact]
public void PortValue_range_failure_quotes_the_value()
{
Assert.Equal("X:Port must be between 1 and 65535 (was '0')", PortFailure("0"));
}
[Fact]
public void PortValue_high_range_failure_quotes_the_value()
{
Assert.Equal("X:Port must be between 1 and 65535 (was '70000')", PortFailure("70000"));
}
[Fact]
public void PortValue_parse_failure_quotes_the_value()
{
Assert.Equal("X:Port must be between 1 and 65535 (was 'notaport')", PortFailure("notaport"));
}
[Fact]
public void PortValue_null_failure_renders_null()
{
Assert.Equal("X:Port must be between 1 and 65535 (was 'null')", PortFailure(null));
}
// Configuration-003: strict, culture-invariant parsing rejects sign and surrounding whitespace.
[Theory]
[InlineData("+5000")]
[InlineData(" 5000")]
[InlineData("5000 ")]
[InlineData(" 5000 ")]
[InlineData("-1")]
public void PortValue_rejects_loose_inputs(string raw)
{
var pf = ConfigPreflight.For(Config("X:Port", raw)).RequirePort("X:Port");
Assert.False(pf.IsValid);
Assert.Equal($"X:Port must be between 1 and 65535 (was '{raw}')", Assert.Single(pf.Failures));
}
[Fact]
public void PortValue_accepts_plain_in_range_port()
{
var pf = ConfigPreflight.For(Config("X:Port", "5000")).RequirePort("X:Port");
Assert.True(pf.IsValid);
}
[Theory]
[InlineData("host:+5000")]
[InlineData("host: 5000")]
[InlineData("host:5000 ")]
public void HostPort_rejects_loose_port_inputs(string value)
{
var b = new ValidationBuilder();
b.HostPort(value, "X:Endpoint");
Assert.False(b.IsValid);
Assert.Equal($"X:Endpoint must be 'host:port' with port 1-65535 (was '{value}')", Assert.Single(b.Failures));
}
[Fact]
public void HostPort_accepts_plain_endpoint()
{
var b = new ValidationBuilder();
b.HostPort("host:5000", "X:Endpoint");
Assert.True(b.IsValid);
}
}
@@ -1,6 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<IsPackable>false</IsPackable> <IsPackable>false</IsPackable>
<!-- Test project does not ship; no XML docs required (overrides Directory.Build.props). -->
<GenerateDocumentationFile>false</GenerateDocumentationFile>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="coverlet.collector" /> <PackageReference Include="coverlet.collector" />
@@ -0,0 +1,11 @@
<Project>
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<LangVersion>latest</LangVersion>
<Version>0.1.0</Version>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>
</Project>
@@ -0,0 +1,24 @@
<Project>
<PropertyGroup>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
</PropertyGroup>
<ItemGroup>
<!-- Library -->
<PackageVersion Include="Microsoft.Data.SqlClient" Version="6.0.2" />
<PackageVersion Include="Grpc.AspNetCore" Version="2.76.0" />
<!-- Google.Protobuf and Grpc.Tools must be >= the minimums Grpc.AspNetCore 2.76.0 requires -->
<PackageVersion Include="Google.Protobuf" Version="3.31.1" />
<PackageVersion Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.0" />
<PackageVersion Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="10.0.0" />
<PackageVersion Include="Grpc.Tools" Version="2.76.0" />
<!-- Test -->
<PackageVersion Include="xunit" Version="2.9.3" />
<PackageVersion Include="xunit.runner.visualstudio" Version="3.1.4" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.14.1" />
<PackageVersion Include="coverlet.collector" Version="6.0.4" />
</ItemGroup>
</Project>
@@ -0,0 +1,8 @@
<Solution>
<Folder Name="/src/">
<Project Path="src/ZB.MOM.WW.GalaxyRepository/ZB.MOM.WW.GalaxyRepository.csproj" />
</Folder>
<Folder Name="/tests/">
<Project Path="tests/ZB.MOM.WW.GalaxyRepository.Tests/ZB.MOM.WW.GalaxyRepository.Tests.csproj" />
</Folder>
</Solution>
@@ -0,0 +1,71 @@
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Routing;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository.DependencyInjection;
/// <summary>
/// Dependency-injection and endpoint-routing extensions that register the reusable
/// Galaxy Repository services and map the canonical gRPC service. A consuming gateway
/// calls <see cref="AddZbGalaxyRepository"/> during service registration and
/// <see cref="MapZbGalaxyRepository"/> while building its endpoint pipeline.
/// </summary>
public static class GalaxyRepositoryServiceCollectionExtensions
{
/// <summary>
/// Registers the Galaxy Repository SQL provider, shared hierarchy cache, deploy
/// notifier, on-disk snapshot store, and the background refresh service, binding
/// <see cref="GalaxyRepositoryOptions"/> from the supplied configuration section.
/// </summary>
/// <param name="services">The service collection to add registrations to.</param>
/// <param name="configuration">The application configuration root.</param>
/// <param name="sectionPath">
/// The configuration section path to bind <see cref="GalaxyRepositoryOptions"/> from
/// (for example <c>MxGateway:Galaxy</c> or <c>HistorianGateway:Galaxy</c>).
/// </param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddZbGalaxyRepository(
this IServiceCollection services,
IConfiguration configuration,
string sectionPath)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
ArgumentException.ThrowIfNullOrWhiteSpace(sectionPath);
// Bind only — this shared lib ships no validator, so a .ValidateOnStart() here
// would be a silent no-op. The consuming application owns option validation
// (e.g. the sidecar's ConfigPreflight / validated-options layer).
services
.AddOptions<GalaxyRepositoryOptions>()
.Bind(configuration.GetSection(sectionPath));
services.AddSingleton(sp =>
new GalaxyRepository(sp.GetRequiredService<IOptions<GalaxyRepositoryOptions>>().Value));
services.AddSingleton<IGalaxyRepository>(sp => sp.GetRequiredService<GalaxyRepository>());
services.AddSingleton<IGalaxyDeployNotifier, GalaxyDeployNotifier>();
services.AddSingleton<IGalaxyHierarchySnapshotStore, GalaxyHierarchySnapshotStore>();
services.AddSingleton<IGalaxyHierarchyCache, GalaxyHierarchyCache>();
services.AddHostedService<GalaxyHierarchyRefreshService>();
return services;
}
/// <summary>
/// Maps the canonical <see cref="GalaxyRepositoryGrpcService"/> onto the consuming
/// application's endpoint pipeline. Call after <see cref="AddZbGalaxyRepository"/> and
/// after gRPC has been added to the application's services.
/// </summary>
/// <param name="endpoints">The endpoint route builder to map the gRPC service onto.</param>
/// <returns>The endpoint route builder for chaining.</returns>
public static IEndpointRouteBuilder MapZbGalaxyRepository(this IEndpointRouteBuilder endpoints)
{
ArgumentNullException.ThrowIfNull(endpoints);
endpoints.MapGrpcService<GalaxyRepositoryGrpcService>();
return endpoints;
}
}
@@ -0,0 +1,41 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>One row from <see cref="GalaxyRepository.GetAttributesAsync"/>.</summary>
public sealed class GalaxyAttributeRow
{
/// <summary>Gets the Galaxy object identifier.</summary>
public int GobjectId { get; init; }
/// <summary>Gets the tag name.</summary>
public string TagName { get; init; } = string.Empty;
/// <summary>Gets the attribute name.</summary>
public string AttributeName { get; init; } = string.Empty;
/// <summary>Gets the full tag reference.</summary>
public string FullTagReference { get; init; } = string.Empty;
/// <summary>Gets the MXAccess data type code.</summary>
public int MxDataType { get; init; }
/// <summary>Gets the data type name.</summary>
public string? DataTypeName { get; init; }
/// <summary>Gets a value indicating whether this is an array.</summary>
public bool IsArray { get; init; }
/// <summary>Gets the array dimension, if applicable.</summary>
public int? ArrayDimension { get; init; }
/// <summary>Gets the MXAccess attribute category code.</summary>
public int MxAttributeCategory { get; init; }
/// <summary>Gets the security classification code.</summary>
public int SecurityClassification { get; init; }
/// <summary>Gets a value indicating whether this is historized.</summary>
public bool IsHistorized { get; init; }
/// <summary>Gets a value indicating whether this is an alarm.</summary>
public bool IsAlarm { get; init; }
}
@@ -0,0 +1,19 @@
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Result of one <see cref="GalaxyBrowseProjector.ProjectChildren"/> call. Holds a
/// materialized page of direct children for the requested parent, along with a
/// parallel-indexed <see cref="ChildHasChildren"/> hint and the total post-filter
/// sibling count for paging.
/// </summary>
/// <param name="Children">The page of direct children, sorted areas-first then by display name.</param>
/// <param name="ChildHasChildren">Parallel array indicating whether each child has at least one matching descendant under the same filter set.</param>
/// <param name="TotalChildCount">Total matching direct children of the parent (post-filter).</param>
/// <param name="FilterSignature">Stable signature of the filter and parent selector, used to bind page tokens.</param>
public sealed record GalaxyBrowseChildrenResult(
IReadOnlyList<GalaxyObject> Children,
IReadOnlyList<bool> ChildHasChildren,
int TotalChildCount,
string FilterSignature);
@@ -0,0 +1,281 @@
using System.Collections.Concurrent;
using System.Runtime.CompilerServices;
using System.Security.Cryptography;
using System.Text;
using Grpc.Core;
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Projects one level of children of a parent object out of an immutable
/// <see cref="GalaxyHierarchyCacheEntry"/>. Pure and side-effect free. Memoizes the
/// filtered child list per cache-entry instance so repeated paging is an O(pageSize)
/// slice rather than an O(siblings) filter scan per page. The memo is keyed on the
/// immutable cache entry, so when the cache publishes a new entry the stale memo
/// becomes unreachable and is reclaimed with it.
/// </summary>
public static class GalaxyBrowseProjector
{
private static readonly ConditionalWeakTable<
GalaxyHierarchyCacheEntry,
ConcurrentDictionary<string, FilteredChildren>> FilteredChildrenCache = new();
/// <summary>Projects one page of direct children of the resolved parent.</summary>
/// <param name="entry">The Galaxy hierarchy cache entry to query.</param>
/// <param name="request">The browse-children request.</param>
/// <param name="browseSubtreeGlobs">Optional API-key browse-subtree constraints.</param>
/// <param name="offset">Zero-based offset into the filtered child list.</param>
/// <param name="pageSize">Maximum number of children to return.</param>
public static GalaxyBrowseChildrenResult ProjectChildren(
GalaxyHierarchyCacheEntry entry,
BrowseChildrenRequest request,
IReadOnlyList<string>? browseSubtreeGlobs,
int offset,
int pageSize)
{
ArgumentNullException.ThrowIfNull(entry);
ArgumentNullException.ThrowIfNull(request);
if (offset < 0)
{
throw new ArgumentOutOfRangeException(nameof(offset), offset, "Offset must be greater than or equal to zero.");
}
if (pageSize <= 0)
{
throw new ArgumentOutOfRangeException(nameof(pageSize), pageSize, "Page size must be greater than zero.");
}
int parentId = ResolveParentId(entry, request);
string filterSignature = ComputeFilterSignature(request, browseSubtreeGlobs, parentId);
FilteredChildren filtered = GetFilteredChildren(entry, request, browseSubtreeGlobs, parentId, filterSignature);
bool includeAttributes = IncludeAttributes(request);
int end = (int)Math.Min((long)offset + pageSize, filtered.Children.Count);
List<GalaxyObject> page = new(Math.Max(0, end - offset));
List<bool> hasChildren = new(Math.Max(0, end - offset));
for (int index = offset; index < end; index++)
{
page.Add(CloneObject(filtered.Children[index].Object, includeAttributes));
hasChildren.Add(filtered.HasMatchingDescendant[index]);
}
return new GalaxyBrowseChildrenResult(page, hasChildren, filtered.Children.Count, filterSignature);
}
/// <summary>
/// Resolves the request's parent oneof to a gobject id, throwing
/// <see cref="RpcException"/> with <see cref="StatusCode.NotFound"/> when the
/// parent does not exist. Public so the gRPC handler can compute the same
/// parent id (needed for the page-token signature) without reimplementing the
/// resolution rules.
/// </summary>
/// <param name="entry">The Galaxy hierarchy cache entry to query.</param>
/// <param name="request">The browse-children request.</param>
public static int ResolveParentId(GalaxyHierarchyCacheEntry entry, BrowseChildrenRequest request)
{
switch (request.ParentCase)
{
case BrowseChildrenRequest.ParentOneofCase.None:
return 0;
case BrowseChildrenRequest.ParentOneofCase.ParentGobjectId:
if (request.ParentGobjectId == 0)
{
return 0;
}
if (!entry.Index.ObjectViewsById.ContainsKey(request.ParentGobjectId))
{
throw new RpcException(new Status(StatusCode.NotFound, "BrowseChildren parent was not found."));
}
return request.ParentGobjectId;
case BrowseChildrenRequest.ParentOneofCase.ParentTagName:
{
if (!entry.Index.ObjectViewsByTagName.TryGetValue(request.ParentTagName, out GalaxyObjectView? match))
{
throw new RpcException(new Status(StatusCode.NotFound, "BrowseChildren parent was not found."));
}
return match.Object.GobjectId;
}
case BrowseChildrenRequest.ParentOneofCase.ParentContainedPath:
{
if (!entry.Index.ObjectViewsByContainedPath.TryGetValue(request.ParentContainedPath, out GalaxyObjectView? match))
{
throw new RpcException(new Status(StatusCode.NotFound, "BrowseChildren parent was not found."));
}
return match.Object.GobjectId;
}
default:
return 0;
}
}
private static FilteredChildren GetFilteredChildren(
GalaxyHierarchyCacheEntry entry,
BrowseChildrenRequest request,
IReadOnlyList<string>? browseSubtreeGlobs,
int parentId,
string filterSignature)
{
ConcurrentDictionary<string, FilteredChildren> memo =
FilteredChildrenCache.GetValue(entry, static _ => new ConcurrentDictionary<string, FilteredChildren>(StringComparer.Ordinal));
return memo.GetOrAdd(
filterSignature,
static (_, state) =>
{
IReadOnlyDictionary<int, IReadOnlyList<GalaxyObjectView>> map = state.Entry.Index.ChildrenByParent;
IReadOnlyList<GalaxyObjectView> directChildren = map.TryGetValue(state.ParentId, out IReadOnlyList<GalaxyObjectView>? list)
? list
: Array.Empty<GalaxyObjectView>();
List<GalaxyObjectView> matched = [];
List<bool> hasMatching = [];
foreach (GalaxyObjectView view in directChildren)
{
if (!MatchesBrowseSubtrees(view, state.BrowseSubtreeGlobs))
{
continue;
}
if (!MatchesFilters(view.Object, state.Request))
{
// Even if the direct child itself fails the filter, a matching
// descendant should still surface its ancestor — but only when
// there is one. Mirror the dashboard browse-tree semantics: if a
// descendant matches, include the parent with has-children true.
if (HasMatchingDescendant(view, state.Entry.Index, state.Request, state.BrowseSubtreeGlobs))
{
matched.Add(view);
hasMatching.Add(true);
}
continue;
}
matched.Add(view);
hasMatching.Add(HasMatchingDescendant(view, state.Entry.Index, state.Request, state.BrowseSubtreeGlobs));
}
return new FilteredChildren(matched, hasMatching);
},
(Entry: entry, ParentId: parentId, Request: request, BrowseSubtreeGlobs: browseSubtreeGlobs));
}
private static bool HasMatchingDescendant(
GalaxyObjectView parent,
GalaxyHierarchyIndex index,
BrowseChildrenRequest request,
IReadOnlyList<string>? browseSubtreeGlobs)
{
if (!index.ChildrenByParent.TryGetValue(parent.Object.GobjectId, out IReadOnlyList<GalaxyObjectView>? children))
{
return false;
}
// Defend against pathological cycles in Galaxy data (e.g. a corrupt A→B→A chain).
// BuildContainedPath uses the same visited-id pattern; mirror it so this walk
// terminates even when ChildrenByParent forms a cycle.
HashSet<int> visited = new() { parent.Object.GobjectId };
Stack<GalaxyObjectView> stack = new();
foreach (GalaxyObjectView child in children)
{
if (visited.Add(child.Object.GobjectId))
{
stack.Push(child);
}
}
while (stack.Count > 0)
{
GalaxyObjectView candidate = stack.Pop();
if (MatchesBrowseSubtrees(candidate, browseSubtreeGlobs)
&& MatchesFilters(candidate.Object, request))
{
return true;
}
if (index.ChildrenByParent.TryGetValue(candidate.Object.GobjectId, out IReadOnlyList<GalaxyObjectView>? grandchildren))
{
foreach (GalaxyObjectView grandchild in grandchildren)
{
if (visited.Add(grandchild.Object.GobjectId))
{
stack.Push(grandchild);
}
}
}
}
return false;
}
private static bool MatchesBrowseSubtrees(GalaxyObjectView view, IReadOnlyList<string>? browseSubtreeGlobs)
{
return browseSubtreeGlobs is null
|| browseSubtreeGlobs.Count == 0
|| browseSubtreeGlobs.Any(glob => GalaxyGlobMatcher.IsMatch(view.ContainedPath, glob));
}
private static bool MatchesFilters(GalaxyObject obj, BrowseChildrenRequest request)
{
if (request.CategoryIds.Count > 0 && !request.CategoryIds.Contains(obj.CategoryId))
{
return false;
}
foreach (string templateFilter in request.TemplateChainContains)
{
if (!obj.TemplateChain.Any(template => template.Contains(templateFilter, StringComparison.OrdinalIgnoreCase)))
{
return false;
}
}
if (!string.IsNullOrWhiteSpace(request.TagNameGlob)
&& !GalaxyGlobMatcher.IsMatch(obj.TagName, request.TagNameGlob))
{
return false;
}
if (request.AlarmBearingOnly && !obj.Attributes.Any(attribute => attribute.IsAlarm))
{
return false;
}
if (request.HistorizedOnly && !obj.Attributes.Any(attribute => attribute.IsHistorized))
{
return false;
}
return true;
}
private static bool IncludeAttributes(BrowseChildrenRequest request)
{
return !request.HasIncludeAttributes || request.IncludeAttributes;
}
private static GalaxyObject CloneObject(GalaxyObject source, bool includeAttributes)
{
GalaxyObject clone = source.Clone();
if (!includeAttributes)
{
clone.Attributes.Clear();
}
return clone;
}
/// <summary>Computes a stable filter signature for memoization purposes.</summary>
/// <param name="request">The browse-children request.</param>
/// <param name="browseSubtreeGlobs">Optional API-key browse-subtree constraints.</param>
/// <param name="parentId">Resolved parent gobject id (0 for roots).</param>
public static string ComputeFilterSignature(
BrowseChildrenRequest request,
IReadOnlyList<string>? browseSubtreeGlobs,
int parentId)
{
StringBuilder builder = new();
builder.Append("parent=").Append(parentId.ToString(System.Globalization.CultureInfo.InvariantCulture));
builder.Append("|cat=").AppendJoin(',', request.CategoryIds.Order());
builder.Append("|tpl=").AppendJoin(',', request.TemplateChainContains.Order(StringComparer.OrdinalIgnoreCase));
builder.Append("|glob=").Append(request.TagNameGlob);
builder.Append("|attrs=").Append(request.HasIncludeAttributes ? request.IncludeAttributes.ToString() : "unset");
builder.Append("|alarm=").Append(request.AlarmBearingOnly);
builder.Append("|hist=").Append(request.HistorizedOnly);
builder.Append("|browse=").AppendJoin(',', (browseSubtreeGlobs ?? Array.Empty<string>()).Order(StringComparer.OrdinalIgnoreCase));
byte[] hash = SHA256.HashData(Encoding.UTF8.GetBytes(builder.ToString()));
return Convert.ToHexString(hash, 0, 12);
}
private sealed record FilteredChildren(
IReadOnlyList<GalaxyObjectView> Children,
IReadOnlyList<bool> HasMatchingDescendant);
}
@@ -0,0 +1,18 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>Freshness state of the shared Galaxy hierarchy cache entry.</summary>
public enum GalaxyCacheStatus
{
/// <summary>Cache has never completed a refresh.</summary>
Unknown = 0,
/// <summary>Cache holds data from a recent successful refresh.</summary>
Healthy = 1,
/// <summary>Cache holds data, but the most recent refresh attempt failed
/// or no successful refresh has happened within the staleness threshold.</summary>
Stale = 2,
/// <summary>Latest refresh failed and no prior data is available.</summary>
Unavailable = 3,
}
@@ -0,0 +1,19 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// A single Galaxy deploy notification. Published by <see cref="GalaxyHierarchyCache"/>
/// whenever a refresh detects that <c>galaxy.time_of_last_deploy</c> has changed (or on
/// the first successful refresh). Consumed by <see cref="IGalaxyDeployNotifier"/>
/// subscribers (the streaming gRPC RPC).
/// </summary>
/// <param name="Sequence">Monotonically increasing per process start; gaps indicate dropped events.</param>
/// <param name="ObservedAt">Server wall-clock when the cache observed the deploy.</param>
/// <param name="TimeOfLastDeploy">The <c>galaxy.time_of_last_deploy</c> value, or <see langword="null"/> when the Galaxy table reports none.</param>
/// <param name="ObjectCount">Number of objects in the hierarchy at the time of the event.</param>
/// <param name="AttributeCount">Number of attributes in the hierarchy at the time of the event.</param>
public sealed record GalaxyDeployEventInfo(
long Sequence,
DateTimeOffset ObservedAt,
DateTimeOffset? TimeOfLastDeploy,
int ObjectCount,
int AttributeCount);
@@ -0,0 +1,79 @@
using System.Collections.Concurrent;
using System.Runtime.CompilerServices;
using System.Threading.Channels;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Channel-based fan-out of Galaxy deploy events to streaming gRPC subscribers. Each
/// subscriber gets a private bounded channel so a slow client cannot back-pressure
/// other subscribers or the publisher. When a subscriber's channel is full the oldest
/// event is dropped — clients use the sequence field to detect gaps.
/// </summary>
public sealed class GalaxyDeployNotifier : IGalaxyDeployNotifier
{
private const int SubscriberQueueCapacity = 16;
private readonly ConcurrentDictionary<Guid, Channel<GalaxyDeployEventInfo>> _subscribers = new();
private GalaxyDeployEventInfo? _latest;
/// <summary>
/// The most recent deploy event, or null if none has been published.
/// </summary>
public GalaxyDeployEventInfo? Latest => Volatile.Read(ref _latest);
/// <inheritdoc />
public void Publish(GalaxyDeployEventInfo info)
{
ArgumentNullException.ThrowIfNull(info);
Volatile.Write(ref _latest, info);
foreach (Channel<GalaxyDeployEventInfo> channel in _subscribers.Values)
{
// BoundedChannelFullMode.DropOldest -> writes never wait; we only fail if the
// channel was completed by the subscriber side, which we ignore.
channel.Writer.TryWrite(info);
}
}
/// <inheritdoc />
public async IAsyncEnumerable<GalaxyDeployEventInfo> SubscribeAsync(
[EnumeratorCancellation] CancellationToken cancellationToken)
{
Guid subscriberId = Guid.NewGuid();
Channel<GalaxyDeployEventInfo> channel = Channel.CreateBounded<GalaxyDeployEventInfo>(
new BoundedChannelOptions(SubscriberQueueCapacity)
{
FullMode = BoundedChannelFullMode.DropOldest,
SingleReader = true,
SingleWriter = false,
});
_subscribers[subscriberId] = channel;
// Bootstrap: emit the latest known event so subscribers don't need to wait for
// the next deploy to know current state.
GalaxyDeployEventInfo? bootstrap = Volatile.Read(ref _latest);
if (bootstrap is not null)
{
channel.Writer.TryWrite(bootstrap);
}
try
{
while (await channel.Reader.WaitToReadAsync(cancellationToken).ConfigureAwait(false))
{
while (channel.Reader.TryRead(out GalaxyDeployEventInfo? next))
{
yield return next;
}
}
}
finally
{
_subscribers.TryRemove(subscriberId, out _);
channel.Writer.TryComplete();
}
}
}
@@ -0,0 +1,131 @@
using System.Collections.Concurrent;
using System.Text;
using System.Text.RegularExpressions;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Anchored, case-insensitive glob matcher (<c>*</c> and <c>?</c> wildcards) used by the
/// hierarchy and browse projectors to filter object tag names and browse subtrees.
/// Compiled regexes are cached and the cache is bounded so an unbounded stream of distinct
/// client-supplied globs cannot grow memory without limit.
/// </summary>
public static class GalaxyGlobMatcher
{
/// <summary>
/// Maximum number of compiled-regex entries retained in <see cref="RegexCache"/>.
/// The cache is keyed by glob pattern and patterns flow in from two sources:
/// admin-controlled API-key constraints (naturally bounded) and the
/// client-supplied <c>DiscoverHierarchyRequest.TagNameGlob</c> (unbounded — a
/// client can iterate through generated names and create millions of distinct
/// globs over the process lifetime). Capping the cache bounds memory while
/// keeping the hot working set hit-cached.
/// </summary>
internal const int RegexCacheCapacity = 256;
/// <summary>
/// Bounded compiled-regex cache keyed by glob pattern. <c>IsMatch</c> is called
/// once per object per <c>DiscoverHierarchy</c>/<c>WatchDeployEvents</c>
/// evaluation, so the same handful of glob patterns are translated
/// repeatedly; caching avoids rebuilding and recompiling the regex on every
/// call. Beyond <see cref="RegexCacheCapacity"/> entries the oldest insertion
/// is evicted so a client cannot grow the cache without bound by submitting
/// unique patterns. Eviction is approximate (FIFO over insertion order, not
/// true LRU) because we only need the bound, not exact recency tracking.
/// </summary>
private static readonly ConcurrentDictionary<string, Regex> RegexCache = new(StringComparer.Ordinal);
/// <summary>
/// Insertion-order queue used to evict the oldest cache entry when the cache
/// exceeds <see cref="RegexCacheCapacity"/>. A separate queue keeps the
/// <see cref="RegexCache"/> reads lock-free; the lock below only guards the
/// eviction path.
/// </summary>
private static readonly ConcurrentQueue<string> InsertionOrder = new();
private static readonly object EvictionLock = new();
/// <summary>
/// Current cache size, exposed for tests asserting the cap is honoured.
/// </summary>
internal static int CurrentCacheSize => RegexCache.Count;
/// <summary>Determines whether a value matches a glob pattern (with * and ? wildcards).</summary>
/// <param name="value">The value to test against the glob pattern.</param>
/// <param name="glob">The glob pattern with * and ? wildcards.</param>
public static bool IsMatch(string value, string glob)
{
if (string.IsNullOrWhiteSpace(glob))
{
return true;
}
return GetOrCreateRegex(glob).IsMatch(value ?? string.Empty);
}
private static Regex GetOrCreateRegex(string glob)
{
if (RegexCache.TryGetValue(glob, out Regex? existing))
{
return existing;
}
Regex compiled = new(
BuildRegex(glob),
RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.Compiled,
TimeSpan.FromMilliseconds(100));
// GetOrAdd atomically returns whichever instance is in the cache after the
// call — either the locally-compiled regex (we won the race) or the regex
// another thread inserted (we lost). It also avoids the TryAdd-then-indexer
// pattern where the key could be evicted between the failed TryAdd and the
// indexer read, producing a KeyNotFoundException under contention near the cap.
Regex result = RegexCache.GetOrAdd(glob, compiled);
if (ReferenceEquals(result, compiled))
{
// We were the inserter — track for FIFO eviction and bound the cache.
InsertionOrder.Enqueue(glob);
EvictIfOverCapacity();
}
return result;
}
private static void EvictIfOverCapacity()
{
if (RegexCache.Count <= RegexCacheCapacity)
{
return;
}
// Serialize eviction so two threads do not race past the cap together.
lock (EvictionLock)
{
while (RegexCache.Count > RegexCacheCapacity && InsertionOrder.TryDequeue(out string? oldest))
{
RegexCache.TryRemove(oldest, out _);
}
}
}
private static string BuildRegex(string glob)
{
StringBuilder builder = new("^", glob.Length + 2);
foreach (char character in glob)
{
switch (character)
{
case '*':
builder.Append(".*");
break;
case '?':
builder.Append('.');
break;
default:
builder.Append(Regex.Escape(character.ToString()));
break;
}
}
builder.Append('$');
return builder.ToString();
}
}
@@ -0,0 +1,365 @@
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Server-side cache of Galaxy Repository browse data. All gRPC clients share the same
/// entry — the materialized object list is produced once per refresh and reused across
/// requests. Refreshes are deploy-time gated: every tick queries
/// <c>galaxy.time_of_last_deploy</c> (cheap), and the heavy hierarchy + attributes rowsets
/// are pulled only when that timestamp has advanced.
/// Each successful heavy refresh is persisted to disk through
/// <see cref="IGalaxyHierarchySnapshotStore"/>; the first refresh restores that
/// snapshot (as <see cref="GalaxyCacheStatus.Stale"/>) so clients can browse
/// last-known data when the Galaxy database is unreachable on a cold start.
/// </summary>
public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache, IDisposable
{
private static readonly TimeSpan StaleThreshold = TimeSpan.FromMinutes(5);
private readonly IGalaxyRepository _repository;
private readonly IGalaxyDeployNotifier _notifier;
private readonly IGalaxyHierarchySnapshotStore? _snapshotStore;
private readonly TimeProvider _timeProvider;
private readonly ILogger<GalaxyHierarchyCache>? _logger;
private readonly TaskCompletionSource _firstLoad = new(TaskCreationOptions.RunContinuationsAsynchronously);
private readonly SemaphoreSlim _refreshGate = new(1, 1);
private GalaxyHierarchyCacheEntry _current = GalaxyHierarchyCacheEntry.Empty;
private bool _restoreAttempted;
/// <summary>Initializes a new instance of the <see cref="GalaxyHierarchyCache"/> class.</summary>
/// <param name="repository">Galaxy Repository client for SQL queries.</param>
/// <param name="notifier">Galaxy deploy event notifier.</param>
/// <param name="timeProvider">Provider for current time; defaults to system time.</param>
/// <param name="logger">Optional logger for diagnostic output.</param>
/// <param name="snapshotStore">
/// Optional on-disk snapshot store. When supplied, the cache persists each
/// successful refresh and restores the last snapshot on first load.
/// </param>
public GalaxyHierarchyCache(
IGalaxyRepository repository,
IGalaxyDeployNotifier notifier,
TimeProvider? timeProvider = null,
ILogger<GalaxyHierarchyCache>? logger = null,
IGalaxyHierarchySnapshotStore? snapshotStore = null)
{
_repository = repository;
_notifier = notifier;
_timeProvider = timeProvider ?? TimeProvider.System;
_logger = logger;
_snapshotStore = snapshotStore;
}
/// <summary>Gets the current Galaxy hierarchy cache entry with projected status.</summary>
public GalaxyHierarchyCacheEntry Current
{
get
{
GalaxyHierarchyCacheEntry snapshot = Volatile.Read(ref _current);
GalaxyCacheStatus projected = ProjectStatus(snapshot);
return projected == snapshot.Status
? snapshot
: snapshot with { Status = projected };
}
}
/// <summary>Refreshes the Galaxy hierarchy cache if the deploy time has advanced.</summary>
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
/// <returns>Asynchronous task representing the refresh operation.</returns>
public async Task RefreshAsync(CancellationToken cancellationToken)
{
await _refreshGate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
await RefreshCoreAsync(cancellationToken).ConfigureAwait(false);
}
finally
{
_refreshGate.Release();
}
}
/// <summary>Waits for the Galaxy hierarchy cache to complete its first load.</summary>
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
/// <returns>Asynchronous task representing the wait operation.</returns>
public Task WaitForFirstLoadAsync(CancellationToken cancellationToken)
{
return _firstLoad.Task.WaitAsync(cancellationToken);
}
/// <summary>
/// Disposes the refresh gate. As a DI singleton the cache is disposed once at host
/// shutdown, after the refresh <see cref="GalaxyHierarchyRefreshService"/> has stopped,
/// so no in-flight refresh can be holding the gate.
/// </summary>
public void Dispose()
{
_refreshGate.Dispose();
}
private async Task RefreshCoreAsync(CancellationToken cancellationToken)
{
// First refresh only: seed the cache from the on-disk snapshot before
// querying SQL, so a cold start with an unreachable Galaxy database can
// still serve last-known browse data. Runs under the refresh gate.
if (!_restoreAttempted)
{
_restoreAttempted = true;
await TryRestoreFromDiskAsync(cancellationToken).ConfigureAwait(false);
}
GalaxyHierarchyCacheEntry previous = Volatile.Read(ref _current);
DateTimeOffset queriedAt = _timeProvider.GetUtcNow();
try
{
DateTime? deployRaw = await _repository.GetLastDeployTimeAsync(cancellationToken).ConfigureAwait(false);
DateTimeOffset? deployTime = deployRaw.HasValue
? new DateTimeOffset(DateTime.SpecifyKind(deployRaw.Value, DateTimeKind.Utc))
: null;
bool hasPriorData = previous.HasData;
bool deployChanged = !hasPriorData || deployTime != previous.LastDeployTime;
if (!deployChanged)
{
// No deploy change — skip heavy queries; just bump LastSuccessAt.
GalaxyHierarchyCacheEntry refreshed = previous with
{
Status = GalaxyCacheStatus.Healthy,
LastQueriedAt = queriedAt,
LastSuccessAt = queriedAt,
LastError = null,
};
Volatile.Write(ref _current, refreshed);
_firstLoad.TrySetResult();
return;
}
Task<List<GalaxyHierarchyRow>> hierarchyTask = _repository.GetHierarchyAsync(cancellationToken);
Task<List<GalaxyAttributeRow>> attributesTask = _repository.GetAttributesAsync(cancellationToken);
await Task.WhenAll(hierarchyTask, attributesTask).ConfigureAwait(false);
List<GalaxyHierarchyRow> hierarchy = hierarchyTask.Result;
List<GalaxyAttributeRow> attributes = attributesTask.Result;
long nextSequence = previous.Sequence + 1;
GalaxyHierarchyCacheEntry next = BuildEntry(
status: GalaxyCacheStatus.Healthy,
sequence: nextSequence,
lastQueriedAt: queriedAt,
lastSuccessAt: queriedAt,
lastDeployTime: deployTime,
lastError: null,
hierarchy: hierarchy,
attributes: attributes);
Volatile.Write(ref _current, next);
_firstLoad.TrySetResult();
_notifier.Publish(new GalaxyDeployEventInfo(
Sequence: nextSequence,
ObservedAt: queriedAt,
TimeOfLastDeploy: deployTime,
ObjectCount: hierarchy.Count,
AttributeCount: attributes.Count));
await PersistSnapshotAsync(deployTime, queriedAt, hierarchy, attributes, cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (Exception exception)
{
// Catch every non-cancellation failure — not just SqlException /
// InvalidOperationException. A TimeoutException or Win32Exception
// from connection establishment, or another DbException subtype,
// must still degrade gracefully to Stale/Unavailable and complete
// _firstLoad rather than escape and fault the refresh BackgroundService.
_logger?.LogWarning(exception, "Galaxy hierarchy cache refresh failed.");
GalaxyHierarchyCacheEntry failed = previous with
{
Status = previous.HasData ? GalaxyCacheStatus.Stale : GalaxyCacheStatus.Unavailable,
LastQueriedAt = queriedAt,
LastError = exception.Message,
};
Volatile.Write(ref _current, failed);
_firstLoad.TrySetResult();
}
}
/// <summary>
/// Materializes a complete <see cref="GalaxyHierarchyCacheEntry"/> from raw
/// hierarchy and attribute rowsets. Shared by the live refresh path and the
/// on-disk restore path so both produce an identical object list and index.
/// </summary>
private static GalaxyHierarchyCacheEntry BuildEntry(
GalaxyCacheStatus status,
long sequence,
DateTimeOffset? lastQueriedAt,
DateTimeOffset? lastSuccessAt,
DateTimeOffset? lastDeployTime,
string? lastError,
IReadOnlyList<GalaxyHierarchyRow> hierarchy,
IReadOnlyList<GalaxyAttributeRow> attributes)
{
IReadOnlyList<GalaxyObject> objects = BuildObjects(hierarchy, attributes);
GalaxyHierarchyIndex index = GalaxyHierarchyIndex.Build(objects);
int areaCount = hierarchy.Count(row => row.IsArea);
int historized = attributes.Count(row => row.IsHistorized);
int alarms = attributes.Count(row => row.IsAlarm);
return new GalaxyHierarchyCacheEntry(
Status: status,
Sequence: sequence,
LastQueriedAt: lastQueriedAt,
LastSuccessAt: lastSuccessAt,
LastDeployTime: lastDeployTime,
LastError: lastError,
Objects: objects,
Index: index,
ObjectCount: hierarchy.Count,
AreaCount: areaCount,
AttributeCount: attributes.Count,
HistorizedAttributeCount: historized,
AlarmAttributeCount: alarms);
}
/// <summary>
/// Seeds the cache from the on-disk snapshot when no live data has loaded yet.
/// The restored entry is marked <see cref="GalaxyCacheStatus.Stale"/> — it is
/// last-known data, not live. A later refresh that observes the same deploy
/// time promotes it to healthy; one that observes a newer deploy replaces it.
/// </summary>
private async Task TryRestoreFromDiskAsync(CancellationToken cancellationToken)
{
if (_snapshotStore is null)
{
return;
}
if (Volatile.Read(ref _current).HasData)
{
return;
}
GalaxyHierarchySnapshot? snapshot;
try
{
snapshot = await _snapshotStore.TryLoadAsync(cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (Exception exception)
{
_logger?.LogWarning(exception, "Failed to restore the Galaxy hierarchy from the on-disk snapshot.");
return;
}
if (snapshot is null)
{
return;
}
long sequence = Volatile.Read(ref _current).Sequence + 1;
GalaxyHierarchyCacheEntry restored = BuildEntry(
status: GalaxyCacheStatus.Stale,
sequence: sequence,
lastQueriedAt: snapshot.SavedAt,
lastSuccessAt: snapshot.SavedAt,
lastDeployTime: snapshot.LastDeployTime,
lastError: null,
hierarchy: snapshot.Hierarchy,
attributes: snapshot.Attributes);
Volatile.Write(ref _current, restored);
// Restored data is a valid completed first load: unblock callers waiting on
// the bootstrap gate immediately, rather than making them wait out the full
// wait budget for a live query that — when the database is unreachable, the
// scenario this restore exists for — may not return for seconds.
_firstLoad.TrySetResult();
_notifier.Publish(new GalaxyDeployEventInfo(
Sequence: sequence,
ObservedAt: _timeProvider.GetUtcNow(),
TimeOfLastDeploy: snapshot.LastDeployTime,
ObjectCount: snapshot.Hierarchy.Count,
AttributeCount: snapshot.Attributes.Count));
_logger?.LogInformation(
"Restored Galaxy hierarchy from on-disk snapshot saved {SavedAt:o}: {ObjectCount} objects, {AttributeCount} attributes (status Stale until the Galaxy database confirms).",
snapshot.SavedAt,
snapshot.Hierarchy.Count,
snapshot.Attributes.Count);
}
/// <summary>
/// Persists a successful refresh to disk. Persistence failures are logged and
/// swallowed — a cache that cannot write its backup is still fully usable.
/// </summary>
private async Task PersistSnapshotAsync(
DateTimeOffset? deployTime,
DateTimeOffset savedAt,
IReadOnlyList<GalaxyHierarchyRow> hierarchy,
IReadOnlyList<GalaxyAttributeRow> attributes,
CancellationToken cancellationToken)
{
if (_snapshotStore is null)
{
return;
}
try
{
await _snapshotStore.SaveAsync(
new GalaxyHierarchySnapshot(deployTime, savedAt, hierarchy, attributes),
cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// The refresh was cancelled (service shutdown) before the write finished.
// That is not a persistence failure — do not log it as a warning.
}
catch (Exception exception)
{
_logger?.LogWarning(exception, "Failed to persist the Galaxy hierarchy snapshot to disk.");
}
}
private static IReadOnlyList<GalaxyObject> BuildObjects(
IReadOnlyList<GalaxyHierarchyRow> hierarchy,
IReadOnlyList<GalaxyAttributeRow> attributes)
{
Dictionary<int, List<GalaxyAttributeRow>> attributesByGobjectId = attributes
.GroupBy(a => a.GobjectId)
.ToDictionary(g => g.Key, g => g.ToList());
List<GalaxyObject> objects = new(hierarchy.Count);
foreach (GalaxyHierarchyRow row in hierarchy)
{
objects.Add(GalaxyProtoMapper.MapObject(row, attributesByGobjectId));
}
return objects;
}
private GalaxyCacheStatus ProjectStatus(GalaxyHierarchyCacheEntry snapshot)
{
if (snapshot.Status is GalaxyCacheStatus.Unknown or GalaxyCacheStatus.Unavailable)
{
return snapshot.Status;
}
if (snapshot.LastSuccessAt is { } success
&& _timeProvider.GetUtcNow() - success > StaleThreshold)
{
return GalaxyCacheStatus.Stale;
}
return snapshot.Status;
}
}
@@ -0,0 +1,56 @@
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Immutable snapshot of the Galaxy Repository browse data held by
/// <see cref="GalaxyHierarchyCache"/>. Multiple gRPC clients share the same
/// materialized object list and precomputed hierarchy index.
/// </summary>
/// <param name="Status">The cache freshness state at the time the entry was produced.</param>
/// <param name="Sequence">Monotonically increasing per process start; bumped on each heavy refresh.</param>
/// <param name="LastQueriedAt">UTC wall-clock of the most recent refresh attempt.</param>
/// <param name="LastSuccessAt">UTC wall-clock of the most recent successful refresh.</param>
/// <param name="LastDeployTime">The <c>galaxy.time_of_last_deploy</c> the data was pulled at.</param>
/// <param name="LastError">The most recent refresh error message, or <see langword="null"/>.</param>
/// <param name="Objects">The materialized Galaxy object list.</param>
/// <param name="Index">Precomputed lookup structures over <paramref name="Objects"/>.</param>
/// <param name="ObjectCount">Number of objects in the hierarchy.</param>
/// <param name="AreaCount">Number of area objects in the hierarchy.</param>
/// <param name="AttributeCount">Number of attributes across all objects.</param>
/// <param name="HistorizedAttributeCount">Number of historized attributes.</param>
/// <param name="AlarmAttributeCount">Number of alarm-bearing attributes.</param>
public sealed record GalaxyHierarchyCacheEntry(
GalaxyCacheStatus Status,
long Sequence,
DateTimeOffset? LastQueriedAt,
DateTimeOffset? LastSuccessAt,
DateTimeOffset? LastDeployTime,
string? LastError,
IReadOnlyList<GalaxyObject> Objects,
GalaxyHierarchyIndex Index,
int ObjectCount,
int AreaCount,
int AttributeCount,
int HistorizedAttributeCount,
int AlarmAttributeCount)
{
/// <summary>Gets an empty Galaxy hierarchy cache entry.</summary>
public static GalaxyHierarchyCacheEntry Empty { get; } = new(
Status: GalaxyCacheStatus.Unknown,
Sequence: 0,
LastQueriedAt: null,
LastSuccessAt: null,
LastDeployTime: null,
LastError: null,
Objects: Array.Empty<GalaxyObject>(),
Index: GalaxyHierarchyIndex.Empty,
ObjectCount: 0,
AreaCount: 0,
AttributeCount: 0,
HistorizedAttributeCount: 0,
AlarmAttributeCount: 0);
/// <summary>Gets a value indicating whether the cache entry contains usable data.</summary>
public bool HasData => Status is GalaxyCacheStatus.Healthy or GalaxyCacheStatus.Stale;
}
@@ -0,0 +1,206 @@
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Precomputed lookup structures over a materialized Galaxy object list. Built once per
/// cache entry so browse/discover handlers can resolve roots/parents by id, tag name, or
/// contained path in O(1), enumerate direct children, and resolve tag addresses to objects
/// or attributes without rescanning the full object list.
/// </summary>
public sealed class GalaxyHierarchyIndex
{
private GalaxyHierarchyIndex(
IReadOnlyList<GalaxyObjectView> objectViews,
IReadOnlyDictionary<int, GalaxyObjectView> objectViewsById,
IReadOnlyDictionary<string, GalaxyTagLookup> tagsByAddress,
IReadOnlyDictionary<int, IReadOnlyList<GalaxyObjectView>> childrenByParent,
IReadOnlyDictionary<string, GalaxyObjectView> objectViewsByTagName,
IReadOnlyDictionary<string, GalaxyObjectView> objectViewsByContainedPath)
{
ObjectViews = objectViews;
ObjectViewsById = objectViewsById;
TagsByAddress = tagsByAddress;
ChildrenByParent = childrenByParent;
ObjectViewsByTagName = objectViewsByTagName;
ObjectViewsByContainedPath = objectViewsByContainedPath;
}
/// <summary>Gets an empty Galaxy hierarchy index.</summary>
public static GalaxyHierarchyIndex Empty { get; } = new(
Array.Empty<GalaxyObjectView>(),
new Dictionary<int, GalaxyObjectView>(),
new Dictionary<string, GalaxyTagLookup>(StringComparer.OrdinalIgnoreCase),
new Dictionary<int, IReadOnlyList<GalaxyObjectView>>(),
new Dictionary<string, GalaxyObjectView>(StringComparer.OrdinalIgnoreCase),
new Dictionary<string, GalaxyObjectView>(StringComparer.OrdinalIgnoreCase));
/// <summary>Gets the object views.</summary>
public IReadOnlyList<GalaxyObjectView> ObjectViews { get; }
/// <summary>Gets the object views indexed by gobject id.</summary>
public IReadOnlyDictionary<int, GalaxyObjectView> ObjectViewsById { get; }
/// <summary>Gets tags indexed by address.</summary>
public IReadOnlyDictionary<string, GalaxyTagLookup> TagsByAddress { get; }
/// <summary>Gets direct children grouped by parent gobject id. Root objects (no parent, or self-parented) live under key 0. Each list is sorted areas-first, then by display name (OrdinalIgnoreCase).</summary>
public IReadOnlyDictionary<int, IReadOnlyList<GalaxyObjectView>> ChildrenByParent { get; }
/// <summary>Gets object views indexed by <see cref="GalaxyObject.TagName"/> (OrdinalIgnoreCase). Lets browse/discover handlers resolve parents/roots by tag name in O(1) instead of scanning <see cref="ObjectViews"/>.</summary>
public IReadOnlyDictionary<string, GalaxyObjectView> ObjectViewsByTagName { get; }
/// <summary>Gets object views indexed by contained path (OrdinalIgnoreCase). Lets browse/discover handlers resolve parents/roots by path in O(1) instead of scanning <see cref="ObjectViews"/>.</summary>
public IReadOnlyDictionary<string, GalaxyObjectView> ObjectViewsByContainedPath { get; }
/// <summary>Builds a Galaxy hierarchy index from the given objects.</summary>
/// <param name="objects">The Galaxy objects to index.</param>
/// <returns>A new Galaxy hierarchy index.</returns>
public static GalaxyHierarchyIndex Build(IReadOnlyList<GalaxyObject> objects)
{
if (objects.Count == 0)
{
return Empty;
}
Dictionary<int, GalaxyObject> objectsById = new();
foreach (GalaxyObject obj in objects)
{
objectsById.TryAdd(obj.GobjectId, obj);
}
List<GalaxyObjectView> views = new(objects.Count);
Dictionary<int, GalaxyObjectView> viewsById = new();
Dictionary<string, GalaxyTagLookup> tagsByAddress = new(StringComparer.OrdinalIgnoreCase);
Dictionary<string, GalaxyObjectView> viewsByTagName = new(StringComparer.OrdinalIgnoreCase);
Dictionary<string, GalaxyObjectView> viewsByContainedPath = new(StringComparer.OrdinalIgnoreCase);
foreach (GalaxyObject obj in objects)
{
string path = BuildContainedPath(obj, objectsById);
int depth = string.IsNullOrWhiteSpace(path) ? 0 : path.Count(character => character == '/');
GalaxyObjectView view = new(obj, path, depth);
views.Add(view);
viewsById.TryAdd(obj.GobjectId, view);
if (!string.IsNullOrWhiteSpace(obj.TagName))
{
tagsByAddress.TryAdd(obj.TagName, new GalaxyTagLookup(obj, Attribute: null, path));
viewsByTagName.TryAdd(obj.TagName, view);
}
if (!string.IsNullOrWhiteSpace(path))
{
viewsByContainedPath.TryAdd(path, view);
}
foreach (GalaxyAttribute attribute in obj.Attributes)
{
if (!string.IsNullOrWhiteSpace(attribute.FullTagReference))
{
tagsByAddress.TryAdd(attribute.FullTagReference, new GalaxyTagLookup(obj, attribute, path));
}
}
}
Dictionary<int, List<GalaxyObjectView>> childrenByParent = new();
foreach (GalaxyObjectView view in views)
{
int parentKey = view.Object.ParentGobjectId;
// Treat self-parented (corrupt) rows as roots.
if (parentKey == view.Object.GobjectId)
{
parentKey = 0;
}
// Re-root orphans whose parent object is absent from the set (e.g. a deleted or
// never-loaded container area). Otherwise they bucket under a phantom parent id
// that is never reached from the root, so they vanish from browse entirely.
else if (parentKey != 0 && !objectsById.ContainsKey(parentKey))
{
parentKey = 0;
}
if (!childrenByParent.TryGetValue(parentKey, out List<GalaxyObjectView>? bucket))
{
bucket = [];
childrenByParent[parentKey] = bucket;
}
bucket.Add(view);
}
foreach (List<GalaxyObjectView> bucket in childrenByParent.Values)
{
bucket.Sort(CompareByAreaThenDisplayName);
}
Dictionary<int, IReadOnlyList<GalaxyObjectView>> readOnlyChildren = new(childrenByParent.Count);
foreach (KeyValuePair<int, List<GalaxyObjectView>> kvp in childrenByParent)
{
readOnlyChildren[kvp.Key] = kvp.Value;
}
return new GalaxyHierarchyIndex(
views,
viewsById,
tagsByAddress,
readOnlyChildren,
viewsByTagName,
viewsByContainedPath);
}
private static string BuildContainedPath(
GalaxyObject obj,
IReadOnlyDictionary<int, GalaxyObject> objectsById)
{
Stack<string> names = new();
HashSet<int> seen = [];
GalaxyObject? current = obj;
while (current is not null && seen.Add(current.GobjectId))
{
names.Push(ResolvePathSegment(current));
current = current.ParentGobjectId != 0
&& objectsById.TryGetValue(current.ParentGobjectId, out GalaxyObject? parent)
? parent
: null;
}
return string.Join('/', names.Where(name => !string.IsNullOrWhiteSpace(name)));
}
private static string ResolvePathSegment(GalaxyObject obj)
{
if (!string.IsNullOrWhiteSpace(obj.ContainedName))
{
return obj.ContainedName;
}
if (!string.IsNullOrWhiteSpace(obj.BrowseName))
{
return obj.BrowseName;
}
return obj.TagName;
}
private static int CompareByAreaThenDisplayName(GalaxyObjectView left, GalaxyObjectView right)
{
if (left.Object.IsArea != right.Object.IsArea)
{
return left.Object.IsArea ? -1 : 1;
}
return string.Compare(DisplayNameOf(left), DisplayNameOf(right), StringComparison.OrdinalIgnoreCase);
}
private static string DisplayNameOf(GalaxyObjectView view)
{
GalaxyObject obj = view.Object;
if (!string.IsNullOrWhiteSpace(obj.BrowseName))
{
return obj.BrowseName;
}
if (!string.IsNullOrWhiteSpace(obj.ContainedName))
{
return obj.ContainedName;
}
return obj.TagName;
}
}
@@ -0,0 +1,317 @@
using System.Collections.Concurrent;
using System.Runtime.CompilerServices;
using System.Security.Cryptography;
using System.Text;
using Grpc.Core;
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Projects a <c>DiscoverHierarchy</c> request against an immutable
/// <see cref="GalaxyHierarchyCacheEntry"/>: applies the root/depth/category/template/glob
/// filters, pages the result, and memoizes the filtered list per cache-entry instance so
/// paging is O(pageSize) rather than O(total) per page. Pure and side-effect free.
/// </summary>
public static class GalaxyHierarchyProjector
{
/// <summary>
/// Per-cache-entry memo of filtered, ordered <see cref="GalaxyObjectView"/> lists
/// keyed by filter signature. Without it, paging through a large hierarchy
/// re-applies every filter and re-scans the full <see cref="GalaxyHierarchyIndex.ObjectViews"/>
/// collection on every page — O(total) per page, O(total²/pageSize) end-to-end.
/// With it, the first page builds the filtered list and each subsequent page is an
/// O(pageSize) slice. The table is keyed on the immutable cache-entry instance, so
/// when the cache publishes a new entry the stale memo becomes unreachable and is
/// reclaimed with it — no explicit invalidation needed.
/// </summary>
private static readonly ConditionalWeakTable<GalaxyHierarchyCacheEntry, ConcurrentDictionary<string, IReadOnlyList<GalaxyObjectView>>> FilteredViewCache = new();
/// <summary>Projects a discovery request against a cache entry and returns all matching objects.</summary>
/// <param name="entry">The Galaxy hierarchy cache entry.</param>
/// <param name="request">The discovery hierarchy request.</param>
/// <param name="browseSubtreeGlobs">Optional glob patterns to filter browse subtrees.</param>
public static GalaxyHierarchyQueryResult Project(
GalaxyHierarchyCacheEntry entry,
DiscoverHierarchyRequest request,
IReadOnlyList<string>? browseSubtreeGlobs = null)
{
return Project(
entry,
request,
browseSubtreeGlobs,
offset: 0,
pageSize: int.MaxValue);
}
/// <summary>Projects a discovery request with paging against a cache entry and returns a page of matching objects.</summary>
/// <param name="entry">The Galaxy hierarchy cache entry.</param>
/// <param name="request">The discovery hierarchy request.</param>
/// <param name="browseSubtreeGlobs">Optional glob patterns to filter browse subtrees.</param>
/// <param name="offset">The zero-based offset into the result set.</param>
/// <param name="pageSize">The maximum number of results to return.</param>
public static GalaxyHierarchyQueryResult Project(
GalaxyHierarchyCacheEntry entry,
DiscoverHierarchyRequest request,
IReadOnlyList<string>? browseSubtreeGlobs,
int offset,
int pageSize)
{
ArgumentNullException.ThrowIfNull(entry);
ArgumentNullException.ThrowIfNull(request);
if (offset < 0)
{
throw new ArgumentOutOfRangeException(nameof(offset), offset, "Offset must be greater than or equal to zero.");
}
if (pageSize <= 0)
{
throw new ArgumentOutOfRangeException(nameof(pageSize), pageSize, "Page size must be greater than zero.");
}
int? maxDepth = request.MaxDepth;
if (maxDepth < 0)
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"DiscoverHierarchy max_depth must be greater than or equal to zero when provided."));
}
string filterSignature = ComputeFilterSignature(request, browseSubtreeGlobs);
IReadOnlyList<GalaxyObjectView> matchedViews = GetFilteredViews(
entry,
request,
browseSubtreeGlobs,
maxDepth,
filterSignature);
bool includeAttributes = IncludeAttributes(request);
List<GalaxyObject> page = new(Math.Min(pageSize, Math.Max(0, matchedViews.Count - offset)));
int end = (int)Math.Min((long)offset + pageSize, matchedViews.Count);
for (int index = offset; index < end; index++)
{
page.Add(CloneObject(matchedViews[index].Object, includeAttributes));
}
return new GalaxyHierarchyQueryResult(
page,
matchedViews.Count,
filterSignature);
}
private static IReadOnlyList<GalaxyObjectView> GetFilteredViews(
GalaxyHierarchyCacheEntry entry,
DiscoverHierarchyRequest request,
IReadOnlyList<string>? browseSubtreeGlobs,
int? maxDepth,
string filterSignature)
{
// ResolveRoot can throw RpcException(NotFound); run it before consulting the
// memo so a bad root surfaces consistently regardless of cache state.
IReadOnlyList<GalaxyObjectView> views = entry.Index.ObjectViews;
GalaxyObjectView? root = ResolveRoot(request, entry.Index);
ConcurrentDictionary<string, IReadOnlyList<GalaxyObjectView>> memo =
FilteredViewCache.GetValue(entry, static _ => new ConcurrentDictionary<string, IReadOnlyList<GalaxyObjectView>>(StringComparer.Ordinal));
return memo.GetOrAdd(
filterSignature,
static (_, state) =>
{
List<GalaxyObjectView> matched = [];
foreach (GalaxyObjectView view in state.Views)
{
if (MatchesRoot(view, state.Root, state.MaxDepth)
&& MatchesBrowseSubtrees(view, state.BrowseSubtreeGlobs)
&& MatchesFilters(view.Object, state.Request))
{
matched.Add(view);
}
}
return matched;
},
(Views: views, Root: root, MaxDepth: maxDepth, BrowseSubtreeGlobs: browseSubtreeGlobs, Request: request));
}
/// <summary>Finds an object in the hierarchy by its tag address.</summary>
/// <param name="entry">The Galaxy hierarchy cache entry.</param>
/// <param name="tagAddress">The tag address to search for.</param>
public static GalaxyObject? FindObjectForTag(
GalaxyHierarchyCacheEntry entry,
string tagAddress)
{
if (string.IsNullOrWhiteSpace(tagAddress))
{
return null;
}
return entry.Index.TagsByAddress.TryGetValue(tagAddress, out GalaxyTagLookup? lookup)
? lookup.Object
: null;
}
/// <summary>Finds an attribute in the hierarchy by its tag address.</summary>
/// <param name="entry">The Galaxy hierarchy cache entry.</param>
/// <param name="tagAddress">The tag address to search for.</param>
public static GalaxyAttribute? FindAttributeForTag(
GalaxyHierarchyCacheEntry entry,
string tagAddress)
{
if (string.IsNullOrWhiteSpace(tagAddress))
{
return null;
}
return entry.Index.TagsByAddress.TryGetValue(tagAddress, out GalaxyTagLookup? lookup)
? lookup.Attribute
: null;
}
/// <summary>Gets the contained path for an object by its gobject ID.</summary>
/// <param name="entry">The Galaxy hierarchy cache entry.</param>
/// <param name="gobjectId">The Galaxy object ID.</param>
public static string GetContainedPath(
GalaxyHierarchyCacheEntry entry,
int gobjectId)
{
return entry.Index.ObjectViewsById.TryGetValue(gobjectId, out GalaxyObjectView? view)
? view.ContainedPath
: string.Empty;
}
private static GalaxyObjectView? ResolveRoot(
DiscoverHierarchyRequest request,
GalaxyHierarchyIndex index)
{
GalaxyObjectView? root = request.RootCase switch
{
DiscoverHierarchyRequest.RootOneofCase.None => null,
DiscoverHierarchyRequest.RootOneofCase.RootGobjectId =>
index.ObjectViewsById.TryGetValue(request.RootGobjectId, out GalaxyObjectView? byId) ? byId : null,
DiscoverHierarchyRequest.RootOneofCase.RootTagName =>
index.ObjectViewsByTagName.TryGetValue(request.RootTagName, out GalaxyObjectView? byTag) ? byTag : null,
DiscoverHierarchyRequest.RootOneofCase.RootContainedPath =>
index.ObjectViewsByContainedPath.TryGetValue(request.RootContainedPath, out GalaxyObjectView? byPath) ? byPath : null,
_ => null,
};
if (request.RootCase != DiscoverHierarchyRequest.RootOneofCase.None && root is null)
{
throw new RpcException(new Status(StatusCode.NotFound, "DiscoverHierarchy root was not found."));
}
return root;
}
private static bool MatchesRoot(
GalaxyObjectView view,
GalaxyObjectView? root,
int? maxDepth)
{
if (root is null)
{
return true;
}
bool isRoot = view.Object.GobjectId == root.Object.GobjectId;
bool isDescendant = view.ContainedPath.StartsWith(root.ContainedPath + "/", StringComparison.OrdinalIgnoreCase);
if (!isRoot && !isDescendant)
{
return false;
}
return maxDepth is null || view.Depth - root.Depth <= maxDepth.Value;
}
private static bool MatchesBrowseSubtrees(
GalaxyObjectView view,
IReadOnlyList<string>? browseSubtreeGlobs)
{
return browseSubtreeGlobs is null
|| browseSubtreeGlobs.Count == 0
|| browseSubtreeGlobs.Any(glob => GalaxyGlobMatcher.IsMatch(view.ContainedPath, glob));
}
private static bool MatchesFilters(
GalaxyObject obj,
DiscoverHierarchyRequest request)
{
if (request.CategoryIds.Count > 0 && !request.CategoryIds.Contains(obj.CategoryId))
{
return false;
}
foreach (string templateFilter in request.TemplateChainContains)
{
if (!obj.TemplateChain.Any(template => template.Contains(templateFilter, StringComparison.OrdinalIgnoreCase)))
{
return false;
}
}
if (!string.IsNullOrWhiteSpace(request.TagNameGlob)
&& !GalaxyGlobMatcher.IsMatch(obj.TagName, request.TagNameGlob))
{
return false;
}
if (request.AlarmBearingOnly && !obj.Attributes.Any(attribute => attribute.IsAlarm))
{
return false;
}
if (request.HistorizedOnly && !obj.Attributes.Any(attribute => attribute.IsHistorized))
{
return false;
}
return true;
}
private static bool IncludeAttributes(DiscoverHierarchyRequest request)
{
return !request.HasIncludeAttributes || request.IncludeAttributes;
}
private static GalaxyObject CloneObject(GalaxyObject source, bool includeAttributes)
{
GalaxyObject clone = source.Clone();
if (!includeAttributes)
{
clone.Attributes.Clear();
}
return clone;
}
/// <summary>Computes a stable filter signature for memoization purposes.</summary>
/// <param name="request">The discovery hierarchy request.</param>
/// <param name="browseSubtreeGlobs">Optional glob patterns to filter browse subtrees.</param>
public static string ComputeFilterSignature(
DiscoverHierarchyRequest request,
IReadOnlyList<string>? browseSubtreeGlobs)
{
StringBuilder builder = new();
builder.Append("root=").Append(request.RootCase).Append('|');
builder.Append(request.RootCase switch
{
DiscoverHierarchyRequest.RootOneofCase.RootGobjectId => request.RootGobjectId.ToString(
System.Globalization.CultureInfo.InvariantCulture),
DiscoverHierarchyRequest.RootOneofCase.RootTagName => request.RootTagName,
DiscoverHierarchyRequest.RootOneofCase.RootContainedPath => request.RootContainedPath,
_ => string.Empty,
});
builder.Append("|max=").Append(request.MaxDepth?.ToString(System.Globalization.CultureInfo.InvariantCulture) ?? "");
builder.Append("|cat=").AppendJoin(',', request.CategoryIds.Order());
builder.Append("|tpl=").AppendJoin(',', request.TemplateChainContains.Order(StringComparer.OrdinalIgnoreCase));
builder.Append("|glob=").Append(request.TagNameGlob);
builder.Append("|attrs=").Append(request.HasIncludeAttributes ? request.IncludeAttributes.ToString() : "unset");
builder.Append("|alarm=").Append(request.AlarmBearingOnly);
builder.Append("|hist=").Append(request.HistorizedOnly);
builder.Append("|browse=").AppendJoin(',', (browseSubtreeGlobs ?? Array.Empty<string>()).Order(StringComparer.OrdinalIgnoreCase));
byte[] hash = SHA256.HashData(Encoding.UTF8.GetBytes(builder.ToString()));
return Convert.ToHexString(hash, 0, 12);
}
}
@@ -0,0 +1,16 @@
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Result of one <see cref="GalaxyHierarchyProjector.Project(GalaxyHierarchyCacheEntry, DiscoverHierarchyRequest, System.Collections.Generic.IReadOnlyList{string}, int, int)"/>
/// call: a materialized page of matching objects, the total post-filter object count, and
/// the stable filter signature used to bind page tokens.
/// </summary>
/// <param name="Objects">The page of matching objects.</param>
/// <param name="TotalObjectCount">Total matching objects across the whole hierarchy (post-filter).</param>
/// <param name="FilterSignature">Stable signature of the filter set, used to bind page tokens.</param>
public sealed record GalaxyHierarchyQueryResult(
IReadOnlyList<GalaxyObject> Objects,
int TotalObjectCount,
string FilterSignature);
@@ -0,0 +1,62 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>Background service that periodically refreshes the Galaxy Repository hierarchy cache off the request path.</summary>
public sealed class GalaxyHierarchyRefreshService(
IGalaxyHierarchyCache cache,
IOptions<GalaxyRepositoryOptions> options,
ILogger<GalaxyHierarchyRefreshService> logger,
TimeProvider? timeProvider = null) : BackgroundService
{
private readonly TimeProvider _timeProvider = timeProvider ?? TimeProvider.System;
/// <inheritdoc />
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
TimeSpan interval = TimeSpan.FromSeconds(Math.Max(1, options.Value.DashboardRefreshIntervalSeconds));
try
{
await cache.RefreshAsync(stoppingToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
return;
}
catch (Exception exception)
{
// A transient first-load failure (e.g. a TimeoutException or
// Win32Exception from connection establishment, or a DbException
// subtype the cache does not catch) must not fault this
// BackgroundService and stop the whole host. The cache records
// its own Unavailable/Stale status; the periodic tick below retries.
logger.LogWarning(exception, "Initial Galaxy hierarchy cache load failed; will retry on the refresh interval.");
}
using PeriodicTimer timer = new(interval, _timeProvider);
try
{
while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
{
try
{
await cache.RefreshAsync(stoppingToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
return;
}
catch (Exception exception)
{
logger.LogWarning(exception, "Galaxy hierarchy cache refresh tick failed.");
}
}
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
}
}
}
@@ -0,0 +1,35 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// One row from <see cref="GalaxyRepository.GetHierarchyAsync"/>: a deployed Galaxy
/// <c>gobject</c> with its hierarchy parent and template-derivation chain.
/// </summary>
public sealed class GalaxyHierarchyRow
{
/// <summary>Gets the Galaxy object identifier.</summary>
public int GobjectId { get; init; }
/// <summary>Gets the tag name.</summary>
public string TagName { get; init; } = string.Empty;
/// <summary>Gets the contained name.</summary>
public string ContainedName { get; init; } = string.Empty;
/// <summary>Gets the browse name.</summary>
public string BrowseName { get; init; } = string.Empty;
/// <summary>Gets the parent Galaxy object identifier.</summary>
public int ParentGobjectId { get; init; }
/// <summary>Gets a value indicating whether this is an area.</summary>
public bool IsArea { get; init; }
/// <summary>Gets the category identifier.</summary>
public int CategoryId { get; init; }
/// <summary>Gets the Galaxy object identifier of the host.</summary>
public int HostedByGobjectId { get; init; }
/// <summary>Gets the template derivation chain.</summary>
public IReadOnlyList<string> TemplateChain { get; init; } = Array.Empty<string>();
}
@@ -0,0 +1,24 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// A serializable point-in-time copy of the Galaxy Repository browse data.
/// Holds the raw hierarchy and attribute rowsets — not the materialized
/// protobuf objects — so the restore path runs the exact same
/// materialization as a live refresh. Persisted by
/// <see cref="IGalaxyHierarchySnapshotStore"/> after a successful refresh
/// and reloaded at startup when the Galaxy database is unreachable.
/// </summary>
/// <param name="LastDeployTime">
/// The <c>galaxy.time_of_last_deploy</c> the rowsets were pulled at, or
/// <see langword="null"/> when the Galaxy table reported no deploy. A later
/// live refresh that observes this same timestamp can promote the restored
/// entry to healthy without re-running the heavy queries.
/// </param>
/// <param name="SavedAt">UTC wall-clock when the snapshot was written to disk.</param>
/// <param name="Hierarchy">The persisted object-hierarchy rowset.</param>
/// <param name="Attributes">The persisted attribute rowset.</param>
public sealed record GalaxyHierarchySnapshot(
DateTimeOffset? LastDeployTime,
DateTimeOffset SavedAt,
IReadOnlyList<GalaxyHierarchyRow> Hierarchy,
IReadOnlyList<GalaxyAttributeRow> Attributes);
@@ -0,0 +1,152 @@
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// JSON-file implementation of <see cref="IGalaxyHierarchySnapshotStore"/>.
/// Writes the on-disk snapshot atomically (temp file + rename) so a crash
/// mid-write can never leave a torn file, and ignores files whose schema
/// version it does not recognize. When
/// <see cref="GalaxyRepositoryOptions.PersistSnapshot"/> is <see langword="false"/>
/// — or <see cref="GalaxyRepositoryOptions.SnapshotCachePath"/> is empty —
/// both operations are no-ops. The snapshot path is fully consumer-supplied;
/// this store imposes no platform-specific default, so it is cross-platform.
/// </summary>
public sealed class GalaxyHierarchySnapshotStore : IGalaxyHierarchySnapshotStore, IDisposable
{
/// <summary>
/// On-disk format version. Bump this whenever the persisted shape changes
/// in a way an older or newer consumer cannot read; a mismatched file is
/// ignored rather than misparsed.
/// </summary>
private const int CurrentSchemaVersion = 1;
private static readonly JsonSerializerOptions SerializerOptions = new()
{
WriteIndented = false,
};
private readonly string? _path;
private readonly TimeSpan _writeTimeout;
private readonly ILogger<GalaxyHierarchySnapshotStore>? _logger;
private readonly SemaphoreSlim _ioGate = new(1, 1);
/// <summary>Initializes a new instance of the <see cref="GalaxyHierarchySnapshotStore"/> class.</summary>
/// <param name="options">Galaxy repository options carrying the snapshot path and enable flag.</param>
/// <param name="logger">Optional logger for diagnostic output.</param>
public GalaxyHierarchySnapshotStore(
IOptions<GalaxyRepositoryOptions> options,
ILogger<GalaxyHierarchySnapshotStore>? logger = null)
{
GalaxyRepositoryOptions value = options.Value;
_path = value.PersistSnapshot && !string.IsNullOrWhiteSpace(value.SnapshotCachePath)
? value.SnapshotCachePath
: null;
_writeTimeout = TimeSpan.FromSeconds(Math.Max(1, value.CommandTimeoutSeconds));
_logger = logger;
}
/// <inheritdoc />
public async Task SaveAsync(GalaxyHierarchySnapshot snapshot, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(snapshot);
if (_path is null)
{
return;
}
PersistedFile file = new(CurrentSchemaVersion, snapshot);
await _ioGate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
// Bound the write so a stuck disk — e.g. a SnapshotCachePath on an
// unresponsive network share — cannot stall the caller. On the cache
// refresh path that would otherwise pin the whole refresh loop.
using CancellationTokenSource writeCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
writeCts.CancelAfter(_writeTimeout);
string? directory = Path.GetDirectoryName(_path);
if (!string.IsNullOrEmpty(directory))
{
Directory.CreateDirectory(directory);
}
string tempPath = _path + ".tmp";
await using (FileStream stream = new(tempPath, FileMode.Create, FileAccess.Write, FileShare.None))
{
await JsonSerializer.SerializeAsync(stream, file, SerializerOptions, writeCts.Token).ConfigureAwait(false);
}
File.Move(tempPath, _path, overwrite: true);
_logger?.LogDebug(
"Persisted Galaxy hierarchy snapshot to {Path} ({ObjectCount} objects, {AttributeCount} attributes).",
_path,
snapshot.Hierarchy.Count,
snapshot.Attributes.Count);
}
finally
{
_ioGate.Release();
}
}
/// <inheritdoc />
public async Task<GalaxyHierarchySnapshot?> TryLoadAsync(CancellationToken cancellationToken)
{
if (_path is null || !File.Exists(_path))
{
return null;
}
await _ioGate.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
PersistedFile? file;
await using (FileStream stream = new(_path, FileMode.Open, FileAccess.Read, FileShare.Read))
{
file = await JsonSerializer.DeserializeAsync<PersistedFile>(
stream, SerializerOptions, cancellationToken).ConfigureAwait(false);
}
if (file is null || file.SchemaVersion != CurrentSchemaVersion || file.Snapshot is null)
{
_logger?.LogWarning(
"Ignoring Galaxy hierarchy snapshot at {Path}: unrecognized or empty schema version.",
_path);
return null;
}
return file.Snapshot;
}
catch (Exception exception) when (exception is JsonException or IOException or UnauthorizedAccessException)
{
// A corrupt, truncated, locked, or access-denied snapshot file is an
// expected failure mode for a disk cache — honor the Try contract and
// return null rather than throwing.
_logger?.LogWarning(
exception,
"Ignoring Galaxy hierarchy snapshot at {Path}: the file is unreadable or not valid JSON.",
_path);
return null;
}
finally
{
_ioGate.Release();
}
}
/// <summary>
/// Disposes the I/O gate. As a DI singleton the store is disposed once at host
/// shutdown, by which point no save/load is in flight.
/// </summary>
public void Dispose()
{
_ioGate.Dispose();
}
/// <summary>On-disk envelope: a schema version plus the snapshot payload.</summary>
private sealed record PersistedFile(int SchemaVersion, GalaxyHierarchySnapshot? Snapshot);
}
@@ -0,0 +1,16 @@
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// A <see cref="GalaxyObject"/> paired with its computed contained path and hierarchy
/// depth. Materialized once per cache entry by <see cref="GalaxyHierarchyIndex"/> so
/// browse/discover projection can filter and page without recomputing paths.
/// </summary>
/// <param name="Object">The projected Galaxy object.</param>
/// <param name="ContainedPath">The slash-delimited contained path from the hierarchy root.</param>
/// <param name="Depth">The number of path segments from the root (zero for top-level objects).</param>
public sealed record GalaxyObjectView(
GalaxyObject Object,
string ContainedPath,
int Depth);
@@ -0,0 +1,76 @@
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Maps <see cref="GalaxyHierarchyRow"/> + <see cref="GalaxyAttributeRow"/> rows produced
/// by <see cref="GalaxyRepository"/> into <c>galaxy_repository.v1</c> proto messages.
/// Pure function, separated so it can be unit-tested without a SQL connection.
/// </summary>
public static class GalaxyProtoMapper
{
/// <summary>Maps Galaxy hierarchy and attribute rows to Galaxy object protos.</summary>
/// <param name="hierarchy">Hierarchy rows from Galaxy Repository.</param>
/// <param name="attributes">Attribute rows from Galaxy Repository.</param>
public static IEnumerable<GalaxyObject> MapHierarchy(
IReadOnlyList<GalaxyHierarchyRow> hierarchy,
IReadOnlyList<GalaxyAttributeRow> attributes)
{
Dictionary<int, List<GalaxyAttributeRow>> attributesByGobjectId = attributes
.GroupBy(a => a.GobjectId)
.ToDictionary(g => g.Key, g => g.ToList());
foreach (GalaxyHierarchyRow row in hierarchy)
{
yield return MapObject(row, attributesByGobjectId);
}
}
/// <summary>Maps a Galaxy hierarchy row to a Galaxy object proto.</summary>
/// <param name="row">Hierarchy row from Galaxy Repository.</param>
/// <param name="attributesByGobjectId">Attributes indexed by gobject ID.</param>
public static GalaxyObject MapObject(
GalaxyHierarchyRow row,
IReadOnlyDictionary<int, List<GalaxyAttributeRow>> attributesByGobjectId)
{
GalaxyObject obj = new()
{
GobjectId = row.GobjectId,
TagName = row.TagName,
ContainedName = row.ContainedName,
BrowseName = row.BrowseName,
ParentGobjectId = row.ParentGobjectId,
IsArea = row.IsArea,
CategoryId = row.CategoryId,
HostedByGobjectId = row.HostedByGobjectId,
};
obj.TemplateChain.AddRange(row.TemplateChain);
if (attributesByGobjectId.TryGetValue(row.GobjectId, out List<GalaxyAttributeRow>? attrs))
{
foreach (GalaxyAttributeRow attr in attrs)
{
obj.Attributes.Add(MapAttribute(attr));
}
}
return obj;
}
/// <summary>Maps a Galaxy attribute row to a Galaxy attribute proto.</summary>
/// <param name="row">Attribute row from Galaxy Repository.</param>
public static GalaxyAttribute MapAttribute(GalaxyAttributeRow row) => new()
{
AttributeName = row.AttributeName,
FullTagReference = row.FullTagReference,
MxDataType = row.MxDataType,
DataTypeName = row.DataTypeName ?? string.Empty,
IsArray = row.IsArray,
ArrayDimension = row.ArrayDimension ?? 0,
ArrayDimensionPresent = row.ArrayDimension.HasValue,
MxAttributeCategory = row.MxAttributeCategory,
SecurityClassification = row.SecurityClassification,
IsHistorized = row.IsHistorized,
IsAlarm = row.IsAlarm,
};
}
@@ -0,0 +1,257 @@
using Microsoft.Data.SqlClient;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// SQL access to the AVEVA System Platform Galaxy Repository database.
/// <para>
/// <see cref="HierarchySql" /> is the query originally ported from the OtOpcUa
/// project. <see cref="AttributesSql" /> has diverged: it additionally enumerates the
/// built-in attributes contributed by each object's primitives (from
/// <c>attribute_definition</c> via <c>primitive_instance</c>), so engine/platform objects
/// and extension sub-attributes (e.g. <c>TestAlarm001.Acked</c>) are surfaced. The
/// OtOpcUa query is not kept in sync.
/// </para>
/// </summary>
public sealed class GalaxyRepository(GalaxyRepositoryOptions options) : IGalaxyRepository
{
/// <summary>Tests the connection to the Galaxy Repository database.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
public async Task<bool> TestConnectionAsync(CancellationToken ct = default)
{
try
{
using SqlConnection conn = new(options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
using SqlCommand cmd = new("SELECT 1", conn) { CommandTimeout = options.CommandTimeoutSeconds };
object? result = await cmd.ExecuteScalarAsync(ct).ConfigureAwait(false);
return result is int i && i == 1;
}
catch (SqlException) { return false; }
catch (InvalidOperationException) { return false; }
}
/// <summary>Retrieves the last deployment time from the Galaxy Repository.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
public async Task<DateTime?> GetLastDeployTimeAsync(CancellationToken ct = default)
{
using SqlConnection conn = new(options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
using SqlCommand cmd = new("SELECT time_of_last_deploy FROM galaxy", conn)
{ CommandTimeout = options.CommandTimeoutSeconds };
object? result = await cmd.ExecuteScalarAsync(ct).ConfigureAwait(false);
return result is DateTime dt ? dt : null;
}
/// <summary>Retrieves the complete hierarchy of Galaxy objects from the repository.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
public async Task<List<GalaxyHierarchyRow>> GetHierarchyAsync(CancellationToken ct = default)
{
List<GalaxyHierarchyRow> rows = new();
using SqlConnection conn = new(options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
using SqlCommand cmd = new(HierarchySql, conn) { CommandTimeout = options.CommandTimeoutSeconds };
using SqlDataReader reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
string templateChainRaw = reader.IsDBNull(8) ? string.Empty : reader.GetString(8);
string[] templateChain = templateChainRaw.Length == 0
? Array.Empty<string>()
: templateChainRaw.Split(['|'], StringSplitOptions.RemoveEmptyEntries)
.Select(s => s.Trim())
.Where(s => s.Length > 0)
.ToArray();
rows.Add(new GalaxyHierarchyRow
{
GobjectId = Convert.ToInt32(reader.GetValue(0)),
TagName = reader.GetString(1),
ContainedName = reader.IsDBNull(2) ? string.Empty : reader.GetString(2),
BrowseName = reader.GetString(3),
ParentGobjectId = Convert.ToInt32(reader.GetValue(4)),
IsArea = Convert.ToInt32(reader.GetValue(5)) == 1,
CategoryId = Convert.ToInt32(reader.GetValue(6)),
HostedByGobjectId = Convert.ToInt32(reader.GetValue(7)),
TemplateChain = templateChain,
});
}
return rows;
}
/// <summary>Retrieves all attributes for Galaxy objects from the repository.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
public async Task<List<GalaxyAttributeRow>> GetAttributesAsync(CancellationToken ct = default)
{
List<GalaxyAttributeRow> rows = new();
using SqlConnection conn = new(options.ConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
using SqlCommand cmd = new(AttributesSql, conn) { CommandTimeout = options.CommandTimeoutSeconds };
using SqlDataReader reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
rows.Add(new GalaxyAttributeRow
{
GobjectId = Convert.ToInt32(reader.GetValue(0)),
TagName = reader.GetString(1),
AttributeName = reader.GetString(2),
FullTagReference = reader.GetString(3),
MxDataType = Convert.ToInt32(reader.GetValue(4)),
DataTypeName = reader.IsDBNull(5) ? null : reader.GetString(5),
IsArray = Convert.ToInt32(reader.GetValue(6)) == 1,
ArrayDimension = reader.IsDBNull(7) ? null : Convert.ToInt32(reader.GetValue(7)),
MxAttributeCategory = Convert.ToInt32(reader.GetValue(8)),
SecurityClassification = Convert.ToInt32(reader.GetValue(9)),
IsHistorized = Convert.ToInt32(reader.GetValue(10)) == 1,
IsAlarm = Convert.ToInt32(reader.GetValue(11)) == 1,
});
}
return rows;
}
// Area objects (category 13) are returned even when undeployed (deployed_package_id = 0):
// they are organizational/model nodes that group deployed objects, so excluding them
// orphans every area whose containing area is not itself deployed. All non-area objects
// still require deployment. Orphans left by a missing/deleted parent area are re-rooted
// by GalaxyHierarchyIndex.Build so nothing disappears from browse.
private const string HierarchySql = @"
;WITH template_chain AS (
SELECT g.gobject_id AS instance_gobject_id, t.gobject_id AS template_gobject_id,
t.tag_name AS template_tag_name, t.derived_from_gobject_id, 0 AS depth
FROM gobject g
INNER JOIN gobject t ON t.gobject_id = g.derived_from_gobject_id
WHERE g.is_template = 0 AND g.deployed_package_id <> 0 AND g.derived_from_gobject_id <> 0
UNION ALL
SELECT tc.instance_gobject_id, t.gobject_id, t.tag_name, t.derived_from_gobject_id, tc.depth + 1
FROM template_chain tc
INNER JOIN gobject t ON t.gobject_id = tc.derived_from_gobject_id
WHERE tc.derived_from_gobject_id <> 0 AND tc.depth < 10
)
SELECT DISTINCT
g.gobject_id,
g.tag_name,
g.contained_name,
CASE WHEN g.contained_name IS NULL OR g.contained_name = ''
THEN g.tag_name
ELSE g.contained_name
END AS browse_name,
CASE WHEN g.contained_by_gobject_id = 0
THEN g.area_gobject_id
ELSE g.contained_by_gobject_id
END AS parent_gobject_id,
CASE WHEN td.category_id = 13
THEN 1
ELSE 0
END AS is_area,
td.category_id AS category_id,
g.hosted_by_gobject_id AS hosted_by_gobject_id,
ISNULL(
STUFF((
SELECT '|' + tc.template_tag_name
FROM template_chain tc
WHERE tc.instance_gobject_id = g.gobject_id
ORDER BY tc.depth
FOR XML PATH('')
), 1, 1, ''),
''
) AS template_chain
FROM gobject g
INNER JOIN template_definition td
ON g.template_definition_id = td.template_definition_id
WHERE td.category_id IN (1, 3, 4, 10, 11, 13, 17, 24, 26)
AND g.is_template = 0
AND (g.deployed_package_id <> 0 OR td.category_id = 13)
ORDER BY parent_gobject_id, g.tag_name";
// Unlike HierarchySql, this query has diverged from the OtOpcUa original. It returns two
// kinds of attribute: user-configured dynamic attributes (the original `dynamic_attribute`
// body, src_pri 0) and the built-in attributes every object inherits from its primitives
// (`attribute_definition` joined through `primitive_instance`, src_pri 1). Built-in
// attributes are why engine/platform objects and extension sub-attributes such as
// `TestAlarm001.Acked` show up at all. Built-in rows carry no category filter (the
// `attribute_definition` category numbering differs from `dynamic_attribute`'s — only the
// `_`-prefix and `.Description` name exclusions apply) and are never flagged
// `is_historized`/`is_alarm`: those flags describe a user attribute that anchors an
// extension, not the extension's machinery leaves.
private const string AttributesSql = @"
;WITH deployed_package_chain AS (
SELECT g.gobject_id, p.package_id, p.derived_from_package_id, 0 AS depth
FROM gobject g
INNER JOIN package p ON p.package_id = g.deployed_package_id
WHERE g.is_template = 0 AND g.deployed_package_id <> 0
UNION ALL
SELECT dpc.gobject_id, p.package_id, p.derived_from_package_id, dpc.depth + 1
FROM deployed_package_chain dpc
INNER JOIN package p ON p.package_id = dpc.derived_from_package_id
WHERE dpc.derived_from_package_id <> 0 AND dpc.depth < 10
),
candidate AS (
SELECT
dpc.gobject_id, g.tag_name, da.attribute_name, da.mx_data_type, da.is_array,
CASE WHEN da.is_array = 1
THEN CONVERT(int, CONVERT(varbinary(2),
SUBSTRING(da.mx_value, 15, 2) + SUBSTRING(da.mx_value, 13, 2), 2))
ELSE NULL END AS array_dimension,
da.mx_attribute_category, da.security_classification, dpc.depth, 0 AS src_pri
FROM deployed_package_chain dpc
INNER JOIN dynamic_attribute da ON da.package_id = dpc.package_id
INNER JOIN gobject g ON g.gobject_id = dpc.gobject_id
INNER JOIN template_definition td ON td.template_definition_id = g.template_definition_id
WHERE td.category_id IN (1, 3, 4, 10, 11, 13, 17, 24, 26)
AND da.attribute_name NOT LIKE '[_]%'
AND da.attribute_name NOT LIKE '%.Description'
AND da.mx_attribute_category IN (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24)
UNION ALL
SELECT
dpc.gobject_id, g.tag_name,
CASE WHEN pi.primitive_name IS NULL OR pi.primitive_name = ''
THEN ad.attribute_name
ELSE pi.primitive_name + '.' + ad.attribute_name END AS attribute_name,
ad.mx_data_type, ad.is_array,
CASE WHEN ad.is_array = 1
THEN CONVERT(int, CONVERT(varbinary(2),
SUBSTRING(ad.mx_value, 15, 2) + SUBSTRING(ad.mx_value, 13, 2), 2))
ELSE NULL END AS array_dimension,
ad.mx_attribute_category, ad.security_classification, dpc.depth, 1 AS src_pri
FROM deployed_package_chain dpc
INNER JOIN primitive_instance pi ON pi.package_id = dpc.package_id
INNER JOIN attribute_definition ad ON ad.primitive_definition_id = pi.primitive_definition_id
INNER JOIN gobject g ON g.gobject_id = dpc.gobject_id
INNER JOIN template_definition td ON td.template_definition_id = g.template_definition_id
WHERE td.category_id IN (1, 3, 4, 10, 11, 13, 17, 24, 26)
AND ad.attribute_name NOT LIKE '[_]%'
AND ad.attribute_name NOT LIKE '%.Description'
),
ranked AS (
SELECT c.*, ROW_NUMBER() OVER (
PARTITION BY c.gobject_id, c.attribute_name ORDER BY c.src_pri, c.depth) AS rn
FROM candidate c
)
SELECT
r.gobject_id, r.tag_name, r.attribute_name,
r.tag_name + '.' + r.attribute_name
+ CASE WHEN r.is_array = 1 THEN '[]' ELSE '' END AS full_tag_reference,
r.mx_data_type, dt.description AS data_type_name, r.is_array, r.array_dimension,
r.mx_attribute_category, r.security_classification,
CASE WHEN r.src_pri = 0 AND EXISTS (
SELECT 1 FROM deployed_package_chain dpc2
INNER JOIN primitive_instance pi ON pi.package_id = dpc2.package_id AND pi.primitive_name = r.attribute_name
INNER JOIN primitive_definition pd ON pd.primitive_definition_id = pi.primitive_definition_id AND pd.primitive_name = 'HistoryExtension'
WHERE dpc2.gobject_id = r.gobject_id
) THEN 1 ELSE 0 END AS is_historized,
CASE WHEN r.src_pri = 0 AND EXISTS (
SELECT 1 FROM deployed_package_chain dpc2
INNER JOIN primitive_instance pi ON pi.package_id = dpc2.package_id AND pi.primitive_name = r.attribute_name
INNER JOIN primitive_definition pd ON pd.primitive_definition_id = pi.primitive_definition_id AND pd.primitive_name = 'AlarmExtension'
WHERE dpc2.gobject_id = r.gobject_id
) THEN 1 ELSE 0 END AS is_alarm
FROM ranked r
LEFT JOIN data_type dt ON dt.mx_data_type = r.mx_data_type
WHERE r.rn = 1
ORDER BY r.tag_name, r.attribute_name";
}
@@ -0,0 +1,55 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Connection settings for the AVEVA System Platform Galaxy Repository database.
/// <para>
/// <see cref="SectionName"/> is a generic default; the DI extension accepts an explicit
/// configuration section path so a consumer can bind from its own section (e.g.
/// <c>HistorianGateway:Galaxy</c>).
/// </para>
/// </summary>
public sealed class GalaxyRepositoryOptions
{
/// <summary>
/// Generic default configuration section name. The DI extension accepts an explicit
/// section path, so a consumer may bind from a different section (e.g.
/// <c>HistorianGateway:Galaxy</c>).
/// </summary>
public const string SectionName = "GalaxyRepository";
/// <summary>
/// Default SQL Server connection string for the Galaxy Repository database.
/// Single source of truth shared with the integration-test fallback so the
/// production default and the live-test default cannot drift.
/// </summary>
public const string DefaultConnectionString =
"Server=localhost;Database=ZB;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;";
/// <summary>The SQL Server connection string for the Galaxy Repository database.</summary>
public string ConnectionString { get; init; } = DefaultConnectionString;
/// <summary>The timeout in seconds for SQL commands executed against the Galaxy Repository.</summary>
public int CommandTimeoutSeconds { get; init; } = 60;
/// <summary>
/// Interval (seconds) between background refreshes of the dashboard Galaxy summary
/// cache. SQL is hit at most once per interval regardless of dashboard render rate.
/// </summary>
public int DashboardRefreshIntervalSeconds { get; init; } = 30;
/// <summary>
/// Whether the latest successful Galaxy browse dataset is persisted to disk. When
/// enabled, the cache reloads that snapshot at startup so clients can still browse
/// last-known data while the Galaxy database is unreachable.
/// </summary>
public bool PersistSnapshot { get; init; } = true;
/// <summary>
/// File path for the persisted Galaxy browse snapshot. Ignored when
/// <see cref="PersistSnapshot"/> is <see langword="false"/>. There is no built-in
/// default path — the consumer supplies a cross-platform-friendly path appropriate to
/// its host. When left empty and <see cref="PersistSnapshot"/> is enabled, the
/// snapshot store (a later task) decides where to write.
/// </summary>
public string SnapshotCachePath { get; init; } = string.Empty;
}
@@ -0,0 +1,16 @@
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Resolution result for a tag address: the owning <see cref="GalaxyObject"/>, the
/// specific <see cref="GalaxyAttribute"/> when the address names an attribute (otherwise
/// <see langword="null"/>), and the object's contained path.
/// </summary>
/// <param name="Object">The Galaxy object that owns the looked-up address.</param>
/// <param name="Attribute">The matched attribute, or <see langword="null"/> when the address names an object.</param>
/// <param name="ContainedPath">The owning object's contained path.</param>
public sealed record GalaxyTagLookup(
GalaxyObject Object,
GalaxyAttribute? Attribute,
string ContainedPath);
@@ -0,0 +1,329 @@
using Google.Protobuf.WellKnownTypes;
using Grpc.Core;
using ProtoGalaxyRepository = ZB.MOM.WW.GalaxyRepository.Grpc.GalaxyRepository;
namespace ZB.MOM.WW.GalaxyRepository.Grpc;
/// <summary>
/// Reusable gRPC surface that exposes the Galaxy Repository to clients. Hosted by any
/// consuming gateway (e.g. MxAccessGateway or the HistorianGateway sidecar) via
/// <see cref="DependencyInjection.GalaxyRepositoryServiceCollectionExtensions.MapZbGalaxyRepository"/>.
/// <para>
/// <c>DiscoverHierarchy</c> and <c>GetLastDeployTime</c> serve from
/// <see cref="IGalaxyHierarchyCache"/> so many clients share a single SQL pull.
/// <c>WatchDeployEvents</c> streams events from <see cref="IGalaxyDeployNotifier"/>.
/// <c>TestConnection</c> remains a direct SQL probe since callers use it as a health check.
/// </para>
/// <para>
/// This service applies <b>no</b> per-identity browse-subtree filtering — the full
/// hierarchy is projected (<c>null</c> subtree globs). Authorization (including any
/// subtree scoping) is the responsibility of the hosting gateway's interceptor layer.
/// </para>
/// </summary>
/// <param name="repository">Direct SQL surface used by <c>TestConnection</c>.</param>
/// <param name="cache">Shared hierarchy cache that <c>DiscoverHierarchy</c>/<c>BrowseChildren</c>/<c>GetLastDeployTime</c> serve from.</param>
/// <param name="notifier">Deploy-event source streamed by <c>WatchDeployEvents</c>.</param>
public sealed class GalaxyRepositoryGrpcService(
IGalaxyRepository repository,
IGalaxyHierarchyCache cache,
IGalaxyDeployNotifier notifier) : ProtoGalaxyRepository.GalaxyRepositoryBase
{
private static readonly TimeSpan FirstLoadWaitBudget = TimeSpan.FromSeconds(5);
private const int DefaultDiscoverPageSize = 1000;
private const int MaxDiscoverPageSize = 5000;
private const int DefaultBrowsePageSize = 500;
// MaxBrowsePageSize reuses MaxDiscoverPageSize (5000) — same cap.
/// <inheritdoc />
public override async Task<TestConnectionReply> TestConnection(
TestConnectionRequest request,
ServerCallContext context)
{
bool ok = await repository.TestConnectionAsync(context.CancellationToken).ConfigureAwait(false);
return new TestConnectionReply { Ok = ok };
}
/// <inheritdoc />
public override async Task<GetLastDeployTimeReply> GetLastDeployTime(
GetLastDeployTimeRequest request,
ServerCallContext context)
{
await WaitForCacheBootstrap(context.CancellationToken).ConfigureAwait(false);
GalaxyHierarchyCacheEntry entry = cache.Current;
if (!entry.HasData)
{
throw new RpcException(new Status(
StatusCode.Unavailable,
ResolveUnavailableMessage(entry)));
}
GetLastDeployTimeReply reply = new() { Present = entry.LastDeployTime.HasValue };
if (entry.LastDeployTime.HasValue)
{
reply.TimeOfLastDeploy = Timestamp.FromDateTimeOffset(entry.LastDeployTime.Value);
}
return reply;
}
/// <inheritdoc />
public override async Task<DiscoverHierarchyReply> DiscoverHierarchy(
DiscoverHierarchyRequest request,
ServerCallContext context)
{
await WaitForCacheBootstrap(context.CancellationToken).ConfigureAwait(false);
GalaxyHierarchyCacheEntry entry = cache.Current;
if (!entry.HasData)
{
throw new RpcException(new Status(
StatusCode.Unavailable,
ResolveUnavailableMessage(entry)));
}
int pageSize = ResolvePageSize(request.PageSize);
// The shared library applies no per-identity subtree scoping; the hosting
// gateway enforces authorization at its interceptor layer.
string filterSignature = GalaxyHierarchyProjector.ComputeFilterSignature(request, browseSubtreeGlobs: null);
PageToken pageToken = ParsePageToken(request.PageToken, entry.Sequence, filterSignature);
GalaxyHierarchyQueryResult query = GalaxyHierarchyProjector.Project(
entry,
request,
browseSubtreeGlobs: null,
pageToken.Offset,
pageSize);
int offset = pageToken.Offset;
if (offset > query.TotalObjectCount)
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"DiscoverHierarchy page_token is outside the current hierarchy."));
}
DiscoverHierarchyReply reply = new()
{
TotalObjectCount = query.TotalObjectCount,
};
reply.Objects.Add(query.Objects);
int nextOffset = offset + query.Objects.Count;
if (nextOffset < query.TotalObjectCount)
{
reply.NextPageToken = FormatPageToken(entry.Sequence, query.FilterSignature, nextOffset);
}
return reply;
}
/// <inheritdoc />
public override async Task<BrowseChildrenReply> BrowseChildren(
BrowseChildrenRequest request,
ServerCallContext context)
{
await WaitForCacheBootstrap(context.CancellationToken).ConfigureAwait(false);
GalaxyHierarchyCacheEntry entry = cache.Current;
if (!entry.HasData)
{
throw new RpcException(new Status(
StatusCode.Unavailable,
ResolveUnavailableMessage(entry)));
}
int pageSize = ResolveBrowsePageSize(request.PageSize);
// Resolve the parent id once so the page-token signature can include it
// and the projector sees the same resolved id when memoizing. The projector
// re-resolves internally; with the by-name/by-path indexes on
// GalaxyHierarchyIndex that second call is O(1), so the redundancy is cheap
// and keeps the projector self-contained.
int parentId = GalaxyBrowseProjector.ResolveParentId(entry, request);
string filterSignature = GalaxyBrowseProjector.ComputeFilterSignature(
request, browseSubtreeGlobs: null, parentId);
PageToken pageToken = ParsePageToken(request.PageToken, entry.Sequence, filterSignature);
GalaxyBrowseChildrenResult result = GalaxyBrowseProjector.ProjectChildren(
entry,
request,
browseSubtreeGlobs: null,
pageToken.Offset,
pageSize);
if (pageToken.Offset > result.TotalChildCount)
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"BrowseChildren page_token is outside the current children set."));
}
BrowseChildrenReply reply = new()
{
TotalChildCount = result.TotalChildCount,
CacheSequence = (ulong)entry.Sequence,
};
reply.Children.Add(result.Children);
reply.ChildHasChildren.Add(result.ChildHasChildren);
int nextOffset = pageToken.Offset + result.Children.Count;
if (nextOffset < result.TotalChildCount)
{
reply.NextPageToken = FormatPageToken(entry.Sequence, result.FilterSignature, nextOffset);
}
return reply;
}
/// <inheritdoc />
public override async Task WatchDeployEvents(
WatchDeployEventsRequest request,
IServerStreamWriter<DeployEvent> responseStream,
ServerCallContext context)
{
DateTimeOffset? lastSeen = request.LastSeenDeployTime?.ToDateTimeOffset();
await foreach (GalaxyDeployEventInfo info in notifier
.SubscribeAsync(context.CancellationToken)
.ConfigureAwait(false))
{
// Suppress the initial bootstrap event when the client already knows about
// this deploy time. We only suppress the first one — subsequent events fire
// on actual changes, so they always pass.
if (lastSeen is { } seen && info.TimeOfLastDeploy == seen)
{
lastSeen = null;
continue;
}
lastSeen = null;
await responseStream.WriteAsync(MapDeployEvent(info), context.CancellationToken).ConfigureAwait(false);
}
}
private async Task WaitForCacheBootstrap(CancellationToken cancellationToken)
{
if (cache.Current.HasData || cache.Current.Status == GalaxyCacheStatus.Unavailable)
{
return;
}
using CancellationTokenSource budget = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
budget.CancelAfter(FirstLoadWaitBudget);
try
{
await cache.WaitForFirstLoadAsync(budget.Token).ConfigureAwait(false);
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (OperationCanceledException)
{
// Budget elapsed; fall through and let the caller see the current
// (possibly Unknown/Unavailable) entry.
}
}
private static DeployEvent MapDeployEvent(GalaxyDeployEventInfo info)
{
DeployEvent ev = new()
{
Sequence = (ulong)info.Sequence,
ObservedAt = Timestamp.FromDateTimeOffset(info.ObservedAt),
ObjectCount = info.ObjectCount,
AttributeCount = info.AttributeCount,
TimeOfLastDeployPresent = info.TimeOfLastDeploy.HasValue,
};
if (info.TimeOfLastDeploy.HasValue)
{
ev.TimeOfLastDeploy = Timestamp.FromDateTimeOffset(info.TimeOfLastDeploy.Value);
}
return ev;
}
private static string ResolveUnavailableMessage(GalaxyHierarchyCacheEntry entry) => entry.Status switch
{
GalaxyCacheStatus.Unknown => "Galaxy cache has not completed its initial load yet.",
GalaxyCacheStatus.Unavailable => "Galaxy repository is unavailable.",
_ => "Galaxy cache has no data available.",
};
private static int ResolvePageSize(int requestedPageSize)
{
if (requestedPageSize < 0)
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"DiscoverHierarchy page_size must be greater than zero when provided."));
}
int pageSize = requestedPageSize == 0 ? DefaultDiscoverPageSize : requestedPageSize;
return Math.Min(pageSize, MaxDiscoverPageSize);
}
private static int ResolveBrowsePageSize(int requested)
{
if (requested < 0)
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"BrowseChildren page_size must be greater than zero when provided."));
}
int pageSize = requested == 0 ? DefaultBrowsePageSize : requested;
return Math.Min(pageSize, MaxDiscoverPageSize);
}
private static string FormatPageToken(long sequence, string filterSignature, int offset)
{
return string.Concat(
sequence.ToString(System.Globalization.CultureInfo.InvariantCulture),
":",
filterSignature,
":",
offset.ToString(System.Globalization.CultureInfo.InvariantCulture));
}
private static PageToken ParsePageToken(string pageToken, long currentSequence, string currentFilterSignature)
{
if (string.IsNullOrWhiteSpace(pageToken))
{
return new PageToken(currentSequence, currentFilterSignature, Offset: 0);
}
string[] parts = pageToken.Split(':', count: 3);
if (parts.Length != 3
|| !long.TryParse(
parts[0],
System.Globalization.NumberStyles.None,
System.Globalization.CultureInfo.InvariantCulture,
out long sequence)
|| !int.TryParse(
parts[2],
System.Globalization.NumberStyles.None,
System.Globalization.CultureInfo.InvariantCulture,
out int offset)
|| offset < 0)
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"page_token is invalid."));
}
if (sequence != currentSequence)
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"page_token is stale."));
}
if (!string.Equals(parts[1], currentFilterSignature, StringComparison.Ordinal))
{
throw new RpcException(new Status(
StatusCode.InvalidArgument,
"page_token does not match the current filters."));
}
return new PageToken(sequence, parts[1], offset);
}
private sealed record PageToken(long Sequence, string FilterSignature, int Offset);
}
@@ -0,0 +1,17 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>Publishes Galaxy repository deploy events to subscribers.</summary>
public interface IGalaxyDeployNotifier
{
/// <summary>The most recently published event, or null if no event has fired yet.</summary>
GalaxyDeployEventInfo? Latest { get; }
/// <summary>Publishes a deploy event to all current subscribers and stores it as Latest.</summary>
/// <param name="info">The deploy event to publish.</param>
void Publish(GalaxyDeployEventInfo info);
/// <summary>Subscribes to deploy events. The sequence yields the latest event first (if available) then streams new events as they fire.</summary>
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
/// <returns>Async enumerable of deploy events.</returns>
IAsyncEnumerable<GalaxyDeployEventInfo> SubscribeAsync(CancellationToken cancellationToken);
}
@@ -0,0 +1,25 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>Cache for Galaxy Repository hierarchy data.</summary>
public interface IGalaxyHierarchyCache
{
/// <summary>The latest cache entry. Status freshness is recomputed against the clock.</summary>
GalaxyHierarchyCacheEntry Current { get; }
/// <summary>
/// Forces a refresh against the Galaxy Repository. Performs a cheap
/// <c>time_of_last_deploy</c> probe first and only re-queries the heavy hierarchy +
/// attributes rowsets when the deploy time has changed since the last successful
/// refresh.
/// </summary>
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
Task RefreshAsync(CancellationToken cancellationToken);
/// <summary>
/// Awaits the first completed refresh attempt (success or failure). Useful for
/// gRPC handlers that want to serve from cache without returning Unavailable on the
/// very first request after the service starts.
/// </summary>
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
Task WaitForFirstLoadAsync(CancellationToken cancellationToken);
}
@@ -0,0 +1,28 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Persists the latest Galaxy Repository browse dataset to disk and reloads
/// it at startup. Lets <see cref="GalaxyHierarchyCache"/> serve last-known
/// browse data when the Galaxy database is unreachable on a cold start.
/// </summary>
public interface IGalaxyHierarchySnapshotStore
{
/// <summary>
/// Writes <paramref name="snapshot"/> to disk, replacing any previous
/// snapshot atomically. A no-op when snapshot persistence is disabled.
/// </summary>
/// <param name="snapshot">The browse dataset to persist.</param>
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
Task SaveAsync(GalaxyHierarchySnapshot snapshot, CancellationToken cancellationToken);
/// <summary>
/// Reads the persisted Galaxy browse dataset.
/// </summary>
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
/// <returns>
/// The persisted snapshot, or <see langword="null"/> when none exists,
/// persistence is disabled, or the on-disk file uses an unrecognized
/// schema version.
/// </returns>
Task<GalaxyHierarchySnapshot?> TryLoadAsync(CancellationToken cancellationToken);
}
@@ -0,0 +1,26 @@
namespace ZB.MOM.WW.GalaxyRepository;
/// <summary>
/// Abstraction over <see cref="GalaxyRepository"/>: the read-only SQL surface over the
/// AVEVA System Platform Galaxy Repository database. Exists so consumers (and the cache
/// layer, a later task) can be unit-tested against an in-memory fake without standing up a
/// real <c>Microsoft.Data.SqlClient</c> <c>SqlConnection</c> against a bogus host/port.
/// </summary>
public interface IGalaxyRepository
{
/// <summary>Tests the connection to the Galaxy Repository database.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
Task<bool> TestConnectionAsync(CancellationToken ct = default);
/// <summary>Retrieves the last deployment time from the Galaxy Repository.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
Task<DateTime?> GetLastDeployTimeAsync(CancellationToken ct = default);
/// <summary>Retrieves the complete hierarchy of Galaxy objects from the repository.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
Task<List<GalaxyHierarchyRow>> GetHierarchyAsync(CancellationToken ct = default);
/// <summary>Retrieves all attributes for Galaxy objects from the repository.</summary>
/// <param name="ct">Token to cancel the asynchronous operation.</param>
Task<List<GalaxyAttributeRow>> GetAttributesAsync(CancellationToken ct = default);
}
@@ -0,0 +1,190 @@
syntax = "proto3";
package galaxy_repository.v1;
option csharp_namespace = "ZB.MOM.WW.GalaxyRepository.Grpc";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
// Wire-compatibility policy (ProtobufStyleGuide): this contract evolves
// additively only. Never renumber or repurpose an existing field number or
// enum value. When a field or enum value is removed, add a `reserved` range
// (and `reserved` name) covering it in the same change so a future editor
// cannot accidentally reuse the retired tag. There are no `reserved`
// declarations today because no field or enum value has ever been removed.
// Read-only browse over the AVEVA System Platform Galaxy Repository (ZB SQL
// database). Lets clients enumerate the deployed object hierarchy and each
// object's dynamic attributes so they know what tag references to subscribe
// to via the MxAccessGateway service.
service GalaxyRepository {
rpc TestConnection(TestConnectionRequest) returns (TestConnectionReply);
rpc GetLastDeployTime(GetLastDeployTimeRequest) returns (GetLastDeployTimeReply);
rpc DiscoverHierarchy(DiscoverHierarchyRequest) returns (DiscoverHierarchyReply);
// Server-stream of deploy events. The server emits the current state immediately
// on subscribe (so clients can bootstrap their cache without waiting for the next
// deploy), then emits one event each time the gateway's hierarchy cache observes
// a new galaxy.time_of_last_deploy. The sequence field is monotonically
// increasing per server start; gaps indicate the per-subscriber buffer dropped
// older events because the client was too slow.
rpc WatchDeployEvents(WatchDeployEventsRequest) returns (stream DeployEvent);
// Returns the direct children of a parent object (or the root objects when
// `parent` is unset). Designed for OPC UA-style lazy expand: clients walk
// one level at a time instead of paging the full hierarchy. Filters mirror
// DiscoverHierarchy exactly. Backed by the same shared hierarchy cache.
rpc BrowseChildren(BrowseChildrenRequest) returns (BrowseChildrenReply);
}
message TestConnectionRequest {}
message TestConnectionReply {
bool ok = 1;
}
message GetLastDeployTimeRequest {}
message GetLastDeployTimeReply {
bool present = 1;
google.protobuf.Timestamp time_of_last_deploy = 2;
}
message DiscoverHierarchyRequest {
// Maximum number of objects to return. The server applies its default when
// unset and rejects non-positive values.
int32 page_size = 1;
// Opaque token returned by a previous DiscoverHierarchy response.
string page_token = 2;
// Optional. When set, return only this object and its descendants.
// Empty = full hierarchy.
oneof root {
int32 root_gobject_id = 3;
string root_tag_name = 4;
string root_contained_path = 5;
}
// Optional. Cap on descendant depth from root. Zero returns only the root.
// Unset means unlimited depth.
google.protobuf.Int32Value max_depth = 6;
// Optional object category id filters.
repeated int32 category_ids = 7;
// Optional case-insensitive substring filters against template names.
repeated string template_chain_contains = 8;
// Optional anchored, case-insensitive glob over object tag_name.
string tag_name_glob = 9;
// Optional. Unset or true includes attributes. False returns object skeletons.
optional bool include_attributes = 10;
// Optional. Return only objects with at least one alarm-bearing attribute.
bool alarm_bearing_only = 11;
// Optional. Return only objects with at least one historized attribute.
bool historized_only = 12;
}
message DiscoverHierarchyReply {
repeated GalaxyObject objects = 1;
// Non-empty when another page is available.
string next_page_token = 2;
// Total number of objects in the cached hierarchy at the time of the call.
int32 total_object_count = 3;
}
message WatchDeployEventsRequest {
// Optional. When set, the bootstrap event is suppressed if the cached deploy
// time matches this value. Future events are still emitted normally.
google.protobuf.Timestamp last_seen_deploy_time = 1;
}
message DeployEvent {
// Monotonically increasing per server start. Gaps indicate dropped events.
uint64 sequence = 1;
// Server wall-clock when the cache observed the deploy.
google.protobuf.Timestamp observed_at = 2;
// Galaxy.time_of_last_deploy. Absent only when the Galaxy table reports null.
google.protobuf.Timestamp time_of_last_deploy = 3;
bool time_of_last_deploy_present = 4;
int32 object_count = 5;
int32 attribute_count = 6;
}
message GalaxyObject {
int32 gobject_id = 1;
string tag_name = 2;
string contained_name = 3;
string browse_name = 4;
int32 parent_gobject_id = 5;
bool is_area = 6;
int32 category_id = 7;
int32 hosted_by_gobject_id = 8;
repeated string template_chain = 9;
repeated GalaxyAttribute attributes = 10;
}
message GalaxyAttribute {
string attribute_name = 1;
string full_tag_reference = 2;
// Raw Galaxy SQL `dbo.data_type` identifier, passed through unchanged.
// This is NOT a member of `mxaccess_gateway.v1.MxDataType` — Galaxy's
// type enumeration is distinct from MXAccess's wire data-type enum and
// the two must not be cast or compared. The GalaxyRepository service is
// metadata-only and deliberately does not share types with
// mxaccess_gateway.proto. See docs/GalaxyRepository.md.
int32 mx_data_type = 3;
// Human-readable name from Galaxy's `dbo.data_type` table (e.g. "Float",
// "Integer", "Boolean"). Free-form Galaxy text; not a stable enum.
string data_type_name = 4;
bool is_array = 5;
int32 array_dimension = 6;
bool array_dimension_present = 7;
// Raw Galaxy SQL attribute-category identifier, passed through unchanged.
// Galaxy-specific; not mapped to any gateway enum. See
// docs/GalaxyRepository.md.
int32 mx_attribute_category = 8;
// Raw Galaxy SQL security-classification identifier, passed through
// unchanged. Galaxy-specific; not mapped to any gateway enum. See
// docs/GalaxyRepository.md.
int32 security_classification = 9;
bool is_historized = 10;
bool is_alarm = 11;
}
message BrowseChildrenRequest {
// Parent selector. Empty oneof returns root objects (parent_gobject_id == 0).
oneof parent {
int32 parent_gobject_id = 1;
string parent_tag_name = 2;
string parent_contained_path = 3;
}
// Maximum number of direct children to return. Server default 500; cap 5000.
int32 page_size = 4;
// Opaque token returned by a previous BrowseChildren response. Bound to the
// cache sequence, parent selector, and the filter set; a mismatch returns
// InvalidArgument.
string page_token = 5;
// --- Filter parity with DiscoverHierarchy. AND-combined. ---
repeated int32 category_ids = 6;
repeated string template_chain_contains = 7;
string tag_name_glob = 8;
optional bool include_attributes = 9;
bool alarm_bearing_only = 10;
bool historized_only = 11;
}
message BrowseChildrenReply {
// Direct children matching the filter, sorted areas-first then by
// case-insensitive display name (same order as the dashboard tree).
repeated GalaxyObject children = 1;
// Non-empty when another page of siblings is available.
string next_page_token = 2;
// Total matching direct children of the parent (post-filter).
int32 total_child_count = 3;
// Parallel array, indexed with `children`. True when the child has at least
// one matching descendant under the same filter set. Lets a UI choose
// whether to draw an expand triangle without an extra round trip.
repeated bool child_has_children = 4;
// Cache sequence this reply was projected from. Clients may pass it back as
// part of the page_token contract. Mismatch on the next page -> InvalidArgument.
uint64 cache_sequence = 5;
}
@@ -0,0 +1,30 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<IsPackable>true</IsPackable>
<PackageId>ZB.MOM.WW.GalaxyRepository</PackageId>
<Authors>ZB.MOM.WW</Authors>
<Description>Read-only Galaxy object-hierarchy browse library for the ZB.MOM.WW SCADA family. Provides a SQL provider for the Galaxy Repository database and a canonical gRPC service for exposing the hierarchy to modern .NET 10 clients — extracted from MxAccessGateway so any consumer can browse the Galaxy without loading 32-bit COM.</Description>
<PackageTags>galaxy;repository;browse;aveva;wonderware;system-platform;scada;grpc;sql;zb-mom-ww</PackageTags>
<PackageProjectUrl>https://gitea.dohertylan.com/dohertj2/zb-mom-ww-galaxyrepository</PackageProjectUrl>
<RepositoryUrl>https://gitea.dohertylan.com/dohertj2/zb-mom-ww-galaxyrepository</RepositoryUrl>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Data.SqlClient" />
<PackageReference Include="Grpc.AspNetCore" />
<PackageReference Include="Google.Protobuf" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" />
<PackageReference Include="Grpc.Tools">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>
<!-- Proto files are added in Task 2; the empty glob is intentional and builds cleanly. -->
<ItemGroup>
<Protobuf Include="Protos\*.proto" GrpcServices="Server" />
</ItemGroup>
</Project>
@@ -0,0 +1,134 @@
using System.Runtime.CompilerServices;
using ZB.MOM.WW.GalaxyRepository;
namespace ZB.MOM.WW.GalaxyRepository.Tests;
/// <summary>
/// In-memory <see cref="IGalaxyRepository"/> returning canned rowsets. Counts the heavy
/// hierarchy/attribute reads so tests can assert deploy-gated skips, and can be flipped to
/// throw so the failure path is exercisable.
/// </summary>
internal sealed class FakeGalaxyRepository : IGalaxyRepository
{
private readonly IReadOnlyList<GalaxyHierarchyRow> _hierarchy;
private readonly IReadOnlyList<GalaxyAttributeRow> _attributes;
public FakeGalaxyRepository(
IReadOnlyList<GalaxyHierarchyRow> hierarchy,
IReadOnlyList<GalaxyAttributeRow> attributes,
DateTime? deployTime)
{
_hierarchy = hierarchy;
_attributes = attributes;
DeployTime = deployTime;
}
/// <summary>The deploy time returned by <see cref="GetLastDeployTimeAsync"/>; mutate to simulate a redeploy.</summary>
public DateTime? DeployTime { get; set; }
/// <summary>When set, every query throws this exception (simulates an unreachable database).</summary>
public Exception? ThrowOnQuery { get; set; }
public int HierarchyReadCount { get; private set; }
public int AttributeReadCount { get; private set; }
public Task<bool> TestConnectionAsync(CancellationToken ct = default) =>
ThrowOnQuery is null ? Task.FromResult(true) : throw ThrowOnQuery;
public Task<DateTime?> GetLastDeployTimeAsync(CancellationToken ct = default)
{
if (ThrowOnQuery is not null)
{
throw ThrowOnQuery;
}
return Task.FromResult(DeployTime);
}
public Task<List<GalaxyHierarchyRow>> GetHierarchyAsync(CancellationToken ct = default)
{
if (ThrowOnQuery is not null)
{
throw ThrowOnQuery;
}
HierarchyReadCount++;
return Task.FromResult(_hierarchy.ToList());
}
public Task<List<GalaxyAttributeRow>> GetAttributesAsync(CancellationToken ct = default)
{
if (ThrowOnQuery is not null)
{
throw ThrowOnQuery;
}
AttributeReadCount++;
return Task.FromResult(_attributes.ToList());
}
}
/// <summary>Records published deploy events so tests can assert publication.</summary>
internal sealed class RecordingDeployNotifier : IGalaxyDeployNotifier
{
public List<GalaxyDeployEventInfo> Published { get; } = [];
public GalaxyDeployEventInfo? Latest { get; private set; }
public void Publish(GalaxyDeployEventInfo info)
{
Published.Add(info);
Latest = info;
}
public async IAsyncEnumerable<GalaxyDeployEventInfo> SubscribeAsync(
[EnumeratorCancellation] CancellationToken cancellationToken)
{
if (Latest is { } latest)
{
yield return latest;
}
await Task.CompletedTask.ConfigureAwait(false);
}
}
/// <summary>
/// In-memory <see cref="IGalaxyHierarchySnapshotStore"/>. Pre-seed <see cref="Snapshot"/>
/// to exercise the restore path; reads <see cref="SaveAsync"/> back to assert persistence.
/// </summary>
internal sealed class FakeSnapshotStore : IGalaxyHierarchySnapshotStore
{
public GalaxyHierarchySnapshot? Snapshot { get; set; }
public int SaveCount { get; private set; }
public int LoadCount { get; private set; }
public Task SaveAsync(GalaxyHierarchySnapshot snapshot, CancellationToken cancellationToken)
{
SaveCount++;
Snapshot = snapshot;
return Task.CompletedTask;
}
public Task<GalaxyHierarchySnapshot?> TryLoadAsync(CancellationToken cancellationToken)
{
LoadCount++;
return Task.FromResult(Snapshot);
}
}
/// <summary>
/// A <see cref="TimeProvider"/> whose UTC clock is fixed (and advanceable) so the cache's
/// staleness projection (which fires after a 5-minute threshold) is deterministic.
/// </summary>
internal sealed class StubTimeProvider(DateTimeOffset start) : TimeProvider
{
private DateTimeOffset _now = start;
public override DateTimeOffset GetUtcNow() => _now;
public void Advance(TimeSpan delta) => _now += delta;
}
@@ -0,0 +1,236 @@
using ZB.MOM.WW.GalaxyRepository;
namespace ZB.MOM.WW.GalaxyRepository.Tests;
/// <summary>
/// Tests for <see cref="GalaxyHierarchyCache"/> first-load, deploy-gating, snapshot
/// restore, persistence, and status-transition behavior. Uses an in-memory
/// <see cref="IGalaxyRepository"/> and snapshot store plus a fixed
/// <see cref="StubTimeProvider"/> so no SQL is touched and no asserts are time-sensitive.
/// </summary>
public sealed class GalaxyHierarchyCacheTests
{
private static readonly DateTimeOffset FixedNow = new(2026, 1, 1, 12, 0, 0, TimeSpan.Zero);
private static readonly DateTime DeployTime = new(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc);
private static List<GalaxyHierarchyRow> SampleHierarchy() =>
[
new() { GobjectId = 1, TagName = "Area1", ContainedName = "Area1", BrowseName = "Area1", IsArea = true },
new() { GobjectId = 2, TagName = "Pump01", ContainedName = "Pump01", BrowseName = "Pump01", ParentGobjectId = 1 },
];
private static List<GalaxyAttributeRow> SampleAttributes() =>
[
new() { GobjectId = 2, AttributeName = "PV", FullTagReference = "Pump01.PV", IsHistorized = true, IsAlarm = true },
];
[Fact]
public async Task RefreshAsync_FirstLoad_PopulatesCurrentWithDataAndUnblocksWaitForFirstLoad()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
RecordingDeployNotifier notifier = new();
using GalaxyHierarchyCache cache = new(repository, notifier, new StubTimeProvider(FixedNow));
// Before refresh, the gate is unset and there is no data.
Assert.False(cache.Current.HasData);
Assert.Equal(GalaxyCacheStatus.Unknown, cache.Current.Status);
await cache.RefreshAsync(CancellationToken.None);
// First load completes (does not hang) and Current now holds usable data.
await cache.WaitForFirstLoadAsync(new CancellationTokenSource(TimeSpan.FromSeconds(5)).Token);
GalaxyHierarchyCacheEntry current = cache.Current;
Assert.True(current.HasData);
Assert.Equal(GalaxyCacheStatus.Healthy, current.Status);
Assert.Equal(2, current.ObjectCount);
Assert.Equal(1, current.AreaCount);
Assert.Equal(1, current.AttributeCount);
Assert.Equal(1, current.HistorizedAttributeCount);
Assert.Equal(1, current.AlarmAttributeCount);
// The heavy queries ran exactly once and a deploy event was published.
Assert.Equal(1, repository.HierarchyReadCount);
Assert.Equal(1, repository.AttributeReadCount);
GalaxyDeployEventInfo published = Assert.Single(notifier.Published);
Assert.Equal(2, published.ObjectCount);
Assert.Equal(1, published.AttributeCount);
}
[Fact]
public async Task RefreshAsync_NoDeployChange_SkipsHeavyQueriesOnSecondRefresh()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
using GalaxyHierarchyCache cache = new(repository, new RecordingDeployNotifier(), new StubTimeProvider(FixedNow));
await cache.RefreshAsync(CancellationToken.None);
await cache.RefreshAsync(CancellationToken.None);
// Deploy time unchanged => the heavy hierarchy/attribute reads happened only once.
Assert.Equal(1, repository.HierarchyReadCount);
Assert.Equal(1, repository.AttributeReadCount);
Assert.True(cache.Current.HasData);
Assert.Equal(GalaxyCacheStatus.Healthy, cache.Current.Status);
}
[Fact]
public async Task RefreshAsync_DeployAdvances_RebuildsAndBumpsSequence()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
RecordingDeployNotifier notifier = new();
using GalaxyHierarchyCache cache = new(repository, notifier, new StubTimeProvider(FixedNow));
await cache.RefreshAsync(CancellationToken.None);
long firstSequence = cache.Current.Sequence;
repository.DeployTime = DeployTime.AddHours(1);
await cache.RefreshAsync(CancellationToken.None);
Assert.Equal(2, repository.HierarchyReadCount);
Assert.Equal(firstSequence + 1, cache.Current.Sequence);
Assert.Equal(2, notifier.Published.Count);
}
[Fact]
public async Task RefreshAsync_FirstQueryFailsNoPriorData_StatusUnavailableButFirstLoadStillCompletes()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime)
{
ThrowOnQuery = new TimeoutException("galaxy db unreachable"),
};
using GalaxyHierarchyCache cache = new(repository, new RecordingDeployNotifier(), new StubTimeProvider(FixedNow));
await cache.RefreshAsync(CancellationToken.None);
// First load must complete so callers do not hang, even though the query failed.
await cache.WaitForFirstLoadAsync(new CancellationTokenSource(TimeSpan.FromSeconds(5)).Token);
Assert.False(cache.Current.HasData);
Assert.Equal(GalaxyCacheStatus.Unavailable, cache.Current.Status);
Assert.Contains("unreachable", cache.Current.LastError);
}
[Fact]
public async Task RefreshAsync_QueryFailsAfterPriorData_DegradesToStaleAndKeepsData()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
using GalaxyHierarchyCache cache = new(repository, new RecordingDeployNotifier(), new StubTimeProvider(FixedNow));
await cache.RefreshAsync(CancellationToken.None);
Assert.True(cache.Current.HasData);
// A later refresh fails: data is retained but flagged Stale.
repository.DeployTime = DeployTime.AddHours(1);
repository.ThrowOnQuery = new InvalidOperationException("transient");
await cache.RefreshAsync(CancellationToken.None);
Assert.True(cache.Current.HasData);
Assert.Equal(GalaxyCacheStatus.Stale, cache.Current.Status);
Assert.Equal(2, cache.Current.ObjectCount);
}
[Fact]
public async Task Current_AfterStalenessThreshold_ProjectsHealthyToStale()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
StubTimeProvider clock = new(FixedNow);
using GalaxyHierarchyCache cache = new(repository, new RecordingDeployNotifier(), clock);
await cache.RefreshAsync(CancellationToken.None);
Assert.Equal(GalaxyCacheStatus.Healthy, cache.Current.Status);
// Advance past the 5-minute staleness threshold with no successful refresh.
clock.Advance(TimeSpan.FromMinutes(6));
Assert.Equal(GalaxyCacheStatus.Stale, cache.Current.Status);
// Data is still present — Stale means "old", not "gone".
Assert.True(cache.Current.HasData);
}
[Fact]
public async Task RefreshAsync_PersistsSnapshotAfterSuccessfulHeavyRefresh()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
FakeSnapshotStore store = new();
using GalaxyHierarchyCache cache = new(
repository, new RecordingDeployNotifier(), new StubTimeProvider(FixedNow), logger: null, snapshotStore: store);
await cache.RefreshAsync(CancellationToken.None);
Assert.Equal(1, store.SaveCount);
Assert.NotNull(store.Snapshot);
Assert.Equal(2, store.Snapshot!.Hierarchy.Count);
Assert.Single(store.Snapshot.Attributes);
}
[Fact]
public async Task RefreshAsync_SnapshotRestore_ServesLastKnownDataAsStaleWhenDatabaseUnreachable()
{
// The snapshot store already holds a persisted dataset (last-known browse data).
FakeSnapshotStore store = new()
{
Snapshot = new GalaxyHierarchySnapshot(
LastDeployTime: DeployTime,
SavedAt: FixedNow.AddMinutes(-1),
Hierarchy: SampleHierarchy(),
Attributes: SampleAttributes()),
};
// The Galaxy database is unreachable on this cold start.
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime)
{
ThrowOnQuery = new TimeoutException("cold start, db down"),
};
RecordingDeployNotifier notifier = new();
using GalaxyHierarchyCache cache = new(
repository, notifier, new StubTimeProvider(FixedNow), logger: null, snapshotStore: store);
await cache.RefreshAsync(CancellationToken.None);
// First load is satisfied by the restored snapshot, not by SQL.
await cache.WaitForFirstLoadAsync(new CancellationTokenSource(TimeSpan.FromSeconds(5)).Token);
Assert.Equal(1, store.LoadCount);
GalaxyHierarchyCacheEntry current = cache.Current;
Assert.True(current.HasData);
// Restored data is "last-known", surfaced as Stale until the live DB confirms.
Assert.Equal(GalaxyCacheStatus.Stale, current.Status);
Assert.Equal(2, current.ObjectCount);
Assert.Equal(DeployTime, current.LastDeployTime!.Value.UtcDateTime);
// A deploy event was published for the restored data.
Assert.Single(notifier.Published);
}
[Fact]
public async Task RefreshAsync_SnapshotRestoreThenLiveQuery_PromotesRestoredDataToHealthy()
{
FakeSnapshotStore store = new()
{
Snapshot = new GalaxyHierarchySnapshot(
LastDeployTime: DeployTime,
SavedAt: FixedNow.AddMinutes(-1),
Hierarchy: SampleHierarchy(),
Attributes: SampleAttributes()),
};
// DB is reachable and reports the SAME deploy time the snapshot was pulled at.
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
using GalaxyHierarchyCache cache = new(
repository, new RecordingDeployNotifier(), new StubTimeProvider(FixedNow), logger: null, snapshotStore: store);
await cache.RefreshAsync(CancellationToken.None);
// Restore seeds Stale data; the same-deploy live query promotes it to Healthy
// without re-running the heavy hierarchy/attribute reads.
Assert.Equal(GalaxyCacheStatus.Healthy, cache.Current.Status);
Assert.Equal(0, repository.HierarchyReadCount);
Assert.True(cache.Current.HasData);
}
[Fact]
public void Dispose_CanBeCalledWithoutHavingRefreshed()
{
FakeGalaxyRepository repository = new(SampleHierarchy(), SampleAttributes(), DeployTime);
GalaxyHierarchyCache cache = new(repository, new RecordingDeployNotifier(), new StubTimeProvider(FixedNow));
// Dispose must be safe even when no refresh ever ran (semaphore never entered).
cache.Dispose();
}
}
@@ -0,0 +1,458 @@
using Grpc.Core;
using ZB.MOM.WW.GalaxyRepository;
using ZB.MOM.WW.GalaxyRepository.Grpc;
namespace ZB.MOM.WW.GalaxyRepository.Tests;
/// <summary>
/// Pure-logic tests for <see cref="GalaxyHierarchyProjector"/> and
/// <see cref="GalaxyBrowseProjector"/>. No SQL: the cache entry under test is built
/// from a small hand-made hierarchy through the same materialization the live cache
/// uses (a fake <see cref="IGalaxyRepository"/> driven through
/// <see cref="GalaxyHierarchyCache.RefreshAsync"/>), so the projectors are exercised
/// against a real <see cref="GalaxyHierarchyIndex"/>.
/// </summary>
public sealed class GalaxyHierarchyProjectorTests
{
/// <summary>
/// Builds a realistic cache entry by driving a fake repository through the cache's
/// own refresh path. This goes through <c>BuildEntry</c> + <see cref="GalaxyHierarchyIndex.Build"/>
/// exactly as production does, rather than reaching for an internal factory.
/// </summary>
private static GalaxyHierarchyCacheEntry BuildEntry(
IReadOnlyList<GalaxyHierarchyRow> hierarchy,
IReadOnlyList<GalaxyAttributeRow> attributes)
{
FakeGalaxyRepository repository = new(hierarchy, attributes, deployTime: new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc));
using GalaxyHierarchyCache cache = new(repository, new RecordingDeployNotifier());
cache.RefreshAsync(CancellationToken.None).GetAwaiter().GetResult();
GalaxyHierarchyCacheEntry entry = cache.Current;
Assert.True(entry.HasData);
return entry;
}
// A small but representative galaxy:
// PlantArea (area, id 1)
// ├─ LineA (area, id 2)
// │ ├─ Pump01 (id 10, template "Pump", historized+alarm attr)
// │ └─ Valve01 (id 11, template "Valve", plain attr)
// └─ Mixer01 (id 12, template "Mixer", alarm attr only)
// StandaloneTank (id 20, no parent — a root object)
private static GalaxyHierarchyCacheEntry BuildSampleEntry()
{
List<GalaxyHierarchyRow> hierarchy =
[
Hierarchy(1, "PlantArea", parent: 0, isArea: true, category: 100),
Hierarchy(2, "LineA", parent: 1, isArea: true, category: 100),
Hierarchy(10, "Pump01", parent: 2, category: 200, templates: ["$Pump", "$UserDefined"]),
Hierarchy(11, "Valve01", parent: 2, category: 201, templates: ["$Valve"]),
Hierarchy(12, "Mixer01", parent: 1, category: 202, templates: ["$Mixer"]),
Hierarchy(20, "StandaloneTank", parent: 0, category: 203, templates: ["$Tank"]),
];
List<GalaxyAttributeRow> attributes =
[
// Pump01: historized AND alarm-bearing.
Attribute(10, "Pump01.PV", historized: true, alarm: true),
Attribute(10, "Pump01.SP", historized: false, alarm: false),
// Valve01: plain.
Attribute(11, "Valve01.Cmd", historized: false, alarm: false),
// Mixer01: alarm only.
Attribute(12, "Mixer01.Fault", historized: false, alarm: true),
// StandaloneTank: historized only.
Attribute(20, "StandaloneTank.Level", historized: true, alarm: false),
];
return BuildEntry(hierarchy, attributes);
}
private static GalaxyHierarchyRow Hierarchy(
int id,
string tagName,
int parent,
bool isArea = false,
int category = 0,
IReadOnlyList<string>? templates = null) => new()
{
GobjectId = id,
TagName = tagName,
ContainedName = tagName,
BrowseName = tagName,
ParentGobjectId = parent,
IsArea = isArea,
CategoryId = category,
TemplateChain = templates ?? Array.Empty<string>(),
};
private static GalaxyAttributeRow Attribute(
int gobjectId,
string fullTagReference,
bool historized,
bool alarm) => new()
{
GobjectId = gobjectId,
AttributeName = fullTagReference.Split('.')[^1],
FullTagReference = fullTagReference,
IsHistorized = historized,
IsAlarm = alarm,
};
[Fact]
public void Project_NoFilters_ReturnsEveryObject()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest());
Assert.Equal(6, result.TotalObjectCount);
Assert.Equal(6, result.Objects.Count);
}
[Fact]
public void Project_PageSizeAndOffset_SlicesTheOrderedResult()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new();
GalaxyHierarchyQueryResult full = GalaxyHierarchyProjector.Project(entry, request, browseSubtreeGlobs: null, offset: 0, pageSize: int.MaxValue);
GalaxyHierarchyQueryResult page1 = GalaxyHierarchyProjector.Project(entry, request, browseSubtreeGlobs: null, offset: 0, pageSize: 2);
GalaxyHierarchyQueryResult page2 = GalaxyHierarchyProjector.Project(entry, request, browseSubtreeGlobs: null, offset: 2, pageSize: 2);
GalaxyHierarchyQueryResult page3 = GalaxyHierarchyProjector.Project(entry, request, browseSubtreeGlobs: null, offset: 4, pageSize: 2);
// Total is unaffected by paging.
Assert.Equal(6, page1.TotalObjectCount);
Assert.Equal(2, page1.Objects.Count);
Assert.Equal(2, page2.Objects.Count);
Assert.Equal(2, page3.Objects.Count);
// The three pages reconstruct the full ordered result with no gaps/dupes.
List<int> paged =
[
.. page1.Objects.Select(o => o.GobjectId),
.. page2.Objects.Select(o => o.GobjectId),
.. page3.Objects.Select(o => o.GobjectId),
];
Assert.Equal(full.Objects.Select(o => o.GobjectId), paged);
}
[Fact]
public void Project_OffsetPastEnd_ReturnsEmptyPageButRealTotal()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(
entry, new DiscoverHierarchyRequest(), browseSubtreeGlobs: null, offset: 999, pageSize: 10);
Assert.Empty(result.Objects);
Assert.Equal(6, result.TotalObjectCount);
}
[Fact]
public void Project_PageSignature_IsStableAcrossPagesAndMatchesComputeFilterSignature()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { TagNameGlob = "Pump*" };
string expected = GalaxyHierarchyProjector.ComputeFilterSignature(request, browseSubtreeGlobs: null);
GalaxyHierarchyQueryResult page1 = GalaxyHierarchyProjector.Project(entry, request, browseSubtreeGlobs: null, offset: 0, pageSize: 1);
GalaxyHierarchyQueryResult page2 = GalaxyHierarchyProjector.Project(entry, request, browseSubtreeGlobs: null, offset: 1, pageSize: 1);
// The signature a caller computes to mint a page token round-trips: the projector
// reports the same signature on every page of the same filter set.
Assert.Equal(expected, page1.FilterSignature);
Assert.Equal(expected, page2.FilterSignature);
}
[Fact]
public void ComputeFilterSignature_DiffersWhenAnyFilterChanges()
{
DiscoverHierarchyRequest baseRequest = new() { TagNameGlob = "Pump*" };
DiscoverHierarchyRequest differentGlob = new() { TagNameGlob = "Valve*" };
DiscoverHierarchyRequest differentAlarm = new() { TagNameGlob = "Pump*", AlarmBearingOnly = true };
string baseSig = GalaxyHierarchyProjector.ComputeFilterSignature(baseRequest, null);
Assert.NotEqual(baseSig, GalaxyHierarchyProjector.ComputeFilterSignature(differentGlob, null));
Assert.NotEqual(baseSig, GalaxyHierarchyProjector.ComputeFilterSignature(differentAlarm, null));
Assert.NotEqual(baseSig, GalaxyHierarchyProjector.ComputeFilterSignature(baseRequest, browseSubtreeGlobs: ["PlantArea/*"]));
// Same inputs => same signature (deterministic).
Assert.Equal(baseSig, GalaxyHierarchyProjector.ComputeFilterSignature(new DiscoverHierarchyRequest { TagNameGlob = "Pump*" }, null));
}
[Fact]
public void Project_MaxDepthZero_FromRoot_ReturnsOnlyTheRoot()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { RootGobjectId = 1, MaxDepth = 0 };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
GalaxyObject only = Assert.Single(result.Objects);
Assert.Equal(1, only.GobjectId);
}
[Fact]
public void Project_MaxDepthOne_FromRoot_ReturnsRootAndDirectChildrenOnly()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
// PlantArea(1) depth 0; LineA(2) and Mixer01(12) depth 1; Pump01/Valve01 depth 2.
DiscoverHierarchyRequest request = new() { RootGobjectId = 1, MaxDepth = 1 };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
Assert.Equal([1, 2, 12], result.Objects.Select(o => o.GobjectId).OrderBy(id => id));
}
[Fact]
public void Project_NegativeMaxDepth_ThrowsInvalidArgument()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { MaxDepth = -1 };
RpcException ex = Assert.Throws<RpcException>(() => GalaxyHierarchyProjector.Project(entry, request));
Assert.Equal(StatusCode.InvalidArgument, ex.StatusCode);
}
[Fact]
public void Project_UnknownRoot_ThrowsNotFound()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { RootGobjectId = 99999 };
RpcException ex = Assert.Throws<RpcException>(() => GalaxyHierarchyProjector.Project(entry, request));
Assert.Equal(StatusCode.NotFound, ex.StatusCode);
}
[Fact]
public void Project_HistorizedOnly_ReturnsOnlyObjectsWithAHistorizedAttribute()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { HistorizedOnly = true };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
// Pump01(10) and StandaloneTank(20) carry historized attributes.
Assert.Equal([10, 20], result.Objects.Select(o => o.GobjectId).OrderBy(id => id));
}
[Fact]
public void Project_AlarmBearingOnly_ReturnsOnlyObjectsWithAnAlarmAttribute()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { AlarmBearingOnly = true };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
// Pump01(10) and Mixer01(12) carry alarm attributes.
Assert.Equal([10, 12], result.Objects.Select(o => o.GobjectId).OrderBy(id => id));
}
[Fact]
public void Project_AlarmAndHistorizedTogether_RequiresBoth()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { AlarmBearingOnly = true, HistorizedOnly = true };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
// Only Pump01(10) carries an attribute set that is both historized and alarm-bearing.
GalaxyObject only = Assert.Single(result.Objects);
Assert.Equal(10, only.GobjectId);
}
[Fact]
public void Project_TagNameGlob_MatchesAnchoredCaseInsensitive()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
GalaxyHierarchyQueryResult prefix = GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest { TagNameGlob = "Pump*" });
Assert.Equal([10], prefix.Objects.Select(o => o.GobjectId));
// Case-insensitive.
GalaxyHierarchyQueryResult lower = GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest { TagNameGlob = "pump01" });
Assert.Equal([10], lower.Objects.Select(o => o.GobjectId));
// '?' single-char wildcard: "Pump0?" matches "Pump01".
GalaxyHierarchyQueryResult single = GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest { TagNameGlob = "Pump0?" });
Assert.Equal([10], single.Objects.Select(o => o.GobjectId));
// Anchored: a bare substring that is not a prefix matches nothing.
GalaxyHierarchyQueryResult anchored = GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest { TagNameGlob = "ump01" });
Assert.Empty(anchored.Objects);
}
[Fact]
public void Project_CategoryIds_FilterByObjectCategory()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { CategoryIds = { 200, 201 } };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
// category 200 = Pump01(10), category 201 = Valve01(11).
Assert.Equal([10, 11], result.Objects.Select(o => o.GobjectId).OrderBy(id => id));
}
[Fact]
public void Project_TemplateChainContains_IsSubstringAndCaseInsensitive()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { TemplateChainContains = { "pump" } };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
GalaxyObject only = Assert.Single(result.Objects);
Assert.Equal(10, only.GobjectId);
}
[Fact]
public void Project_IncludeAttributesDefault_CarriesAttributes()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { TagNameGlob = "Pump*" };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
GalaxyObject pump = Assert.Single(result.Objects);
Assert.Equal(2, pump.Attributes.Count);
}
[Fact]
public void Project_IncludeAttributesFalse_ReturnsSkeletons()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
DiscoverHierarchyRequest request = new() { TagNameGlob = "Pump*", IncludeAttributes = false };
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(entry, request);
GalaxyObject pump = Assert.Single(result.Objects);
Assert.Empty(pump.Attributes);
}
[Fact]
public void Project_IncludeAttributesFalse_DoesNotMutateTheCachedEntry()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
// Project with attributes stripped, then again with attributes included.
GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest { TagNameGlob = "Pump*", IncludeAttributes = false });
GalaxyHierarchyQueryResult included = GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest { TagNameGlob = "Pump*" });
// The earlier strip cloned the object — the cached entry still holds the attributes.
GalaxyObject pump = Assert.Single(included.Objects);
Assert.Equal(2, pump.Attributes.Count);
}
[Fact]
public void Project_InvalidOffsetOrPageSize_Throws()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
Assert.Throws<ArgumentOutOfRangeException>(() =>
GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest(), null, offset: -1, pageSize: 10));
Assert.Throws<ArgumentOutOfRangeException>(() =>
GalaxyHierarchyProjector.Project(entry, new DiscoverHierarchyRequest(), null, offset: 0, pageSize: 0));
}
// ---- GalaxyBrowseProjector ----
[Fact]
public void ProjectChildren_OfPlantArea_ReturnsDirectChildrenAreasFirst()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
BrowseChildrenRequest request = new() { ParentGobjectId = 1 };
GalaxyBrowseChildrenResult result = GalaxyBrowseProjector.ProjectChildren(entry, request, browseSubtreeGlobs: null, offset: 0, pageSize: 100);
// Direct children of PlantArea(1) are LineA(2, area) and Mixer01(12, non-area);
// areas sort first.
Assert.Equal([2, 12], result.Children.Select(c => c.GobjectId));
Assert.Equal(2, result.TotalChildCount);
}
[Fact]
public void ProjectChildren_ChildHasChildrenFlag_ReflectsPresenceOfChildren()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
BrowseChildrenRequest request = new() { ParentGobjectId = 1 };
GalaxyBrowseChildrenResult result = GalaxyBrowseProjector.ProjectChildren(entry, request, browseSubtreeGlobs: null, offset: 0, pageSize: 100);
Dictionary<int, bool> hasChildren = result.Children
.Select((child, index) => (child.GobjectId, result.ChildHasChildren[index]))
.ToDictionary(t => t.GobjectId, t => t.Item2);
// LineA(2) contains Pump01/Valve01 -> true; Mixer01(12) is a leaf -> false.
Assert.True(hasChildren[2]);
Assert.False(hasChildren[12]);
}
[Fact]
public void ProjectChildren_OfRoot_ReturnsTopLevelObjects()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
// Empty parent oneof => roots (parent id 0).
BrowseChildrenRequest request = new();
GalaxyBrowseChildrenResult result = GalaxyBrowseProjector.ProjectChildren(entry, request, browseSubtreeGlobs: null, offset: 0, pageSize: 100);
// Roots: PlantArea(1, area) and StandaloneTank(20, non-area); areas first.
Assert.Equal([1, 20], result.Children.Select(c => c.GobjectId));
}
[Fact]
public void ProjectChildren_FilterMatchingDescendant_SurfacesNonMatchingAncestor()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
// Pump01 lives two levels under PlantArea. Browsing PlantArea's children with a
// Pump glob should still surface LineA (which itself does not match) because it
// contains a matching descendant.
BrowseChildrenRequest request = new() { ParentGobjectId = 1, TagNameGlob = "Pump*" };
GalaxyBrowseChildrenResult result = GalaxyBrowseProjector.ProjectChildren(entry, request, browseSubtreeGlobs: null, offset: 0, pageSize: 100);
GalaxyObject surfaced = Assert.Single(result.Children);
Assert.Equal(2, surfaced.GobjectId);
Assert.True(result.ChildHasChildren[0]);
}
[Fact]
public void ProjectChildren_UnknownParent_ThrowsNotFound()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
BrowseChildrenRequest request = new() { ParentGobjectId = 99999 };
RpcException ex = Assert.Throws<RpcException>(() =>
GalaxyBrowseProjector.ProjectChildren(entry, request, null, 0, 100));
Assert.Equal(StatusCode.NotFound, ex.StatusCode);
}
[Fact]
public void ProjectChildren_Paging_SlicesAndPreservesTotal()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
// LineA(2) has two direct children: Pump01, Valve01.
BrowseChildrenRequest request = new() { ParentGobjectId = 2 };
GalaxyBrowseChildrenResult page1 = GalaxyBrowseProjector.ProjectChildren(entry, request, null, offset: 0, pageSize: 1);
GalaxyBrowseChildrenResult page2 = GalaxyBrowseProjector.ProjectChildren(entry, request, null, offset: 1, pageSize: 1);
Assert.Equal(2, page1.TotalChildCount);
Assert.Single(page1.Children);
Assert.Single(page2.Children);
Assert.NotEqual(page1.Children[0].GobjectId, page2.Children[0].GobjectId);
// Same filter+parent => same signature on both pages.
Assert.Equal(page1.FilterSignature, page2.FilterSignature);
}
[Fact]
public void ResolveParentId_ByTagName_ResolvesToGobjectId()
{
GalaxyHierarchyCacheEntry entry = BuildSampleEntry();
BrowseChildrenRequest request = new() { ParentTagName = "LineA" };
int id = GalaxyBrowseProjector.ResolveParentId(entry, request);
Assert.Equal(2, id);
}
}
@@ -0,0 +1,84 @@
using Microsoft.Extensions.Options;
using ZB.MOM.WW.GalaxyRepository;
namespace ZB.MOM.WW.GalaxyRepository.Tests;
/// <summary>
/// Round-trip tests for the real <see cref="GalaxyHierarchySnapshotStore"/> over a temp
/// file path: save then load, no-op when persistence is disabled, and clean disposal.
/// </summary>
public sealed class GalaxyHierarchySnapshotStoreTests : IDisposable
{
private readonly string _path = Path.Combine(
Path.GetTempPath(),
$"galaxyrepo-snap-{Guid.NewGuid():N}.json");
public void Dispose()
{
if (File.Exists(_path))
{
File.Delete(_path);
}
}
private static GalaxyHierarchySnapshot SampleSnapshot() => new(
LastDeployTime: new DateTimeOffset(2026, 1, 1, 0, 0, 0, TimeSpan.Zero),
SavedAt: new DateTimeOffset(2026, 1, 1, 12, 0, 0, TimeSpan.Zero),
Hierarchy:
[
new GalaxyHierarchyRow { GobjectId = 1, TagName = "Area1", IsArea = true },
new GalaxyHierarchyRow { GobjectId = 2, TagName = "Pump01", ParentGobjectId = 1 },
],
Attributes:
[
new GalaxyAttributeRow { GobjectId = 2, AttributeName = "PV", FullTagReference = "Pump01.PV", IsHistorized = true },
]);
[Fact]
public async Task SaveThenLoad_RoundTripsTheSnapshot()
{
using GalaxyHierarchySnapshotStore store = new(
Options.Create(new GalaxyRepositoryOptions { PersistSnapshot = true, SnapshotCachePath = _path }));
await store.SaveAsync(SampleSnapshot(), CancellationToken.None);
GalaxyHierarchySnapshot? loaded = await store.TryLoadAsync(CancellationToken.None);
Assert.NotNull(loaded);
Assert.Equal(2, loaded!.Hierarchy.Count);
Assert.Single(loaded.Attributes);
Assert.Equal("Pump01.PV", loaded.Attributes[0].FullTagReference);
Assert.True(loaded.Attributes[0].IsHistorized);
Assert.Equal(SampleSnapshot().LastDeployTime, loaded.LastDeployTime);
}
[Fact]
public async Task SaveAndLoad_AreNoOps_WhenPersistenceDisabled()
{
using GalaxyHierarchySnapshotStore store = new(
Options.Create(new GalaxyRepositoryOptions { PersistSnapshot = false, SnapshotCachePath = _path }));
await store.SaveAsync(SampleSnapshot(), CancellationToken.None);
Assert.False(File.Exists(_path));
Assert.Null(await store.TryLoadAsync(CancellationToken.None));
}
[Fact]
public async Task TryLoad_ReturnsNull_WhenNoFileExists()
{
using GalaxyHierarchySnapshotStore store = new(
Options.Create(new GalaxyRepositoryOptions { PersistSnapshot = true, SnapshotCachePath = _path }));
Assert.Null(await store.TryLoadAsync(CancellationToken.None));
}
[Fact]
public async Task TryLoad_ReturnsNull_WhenFileIsNotValidJson()
{
await File.WriteAllTextAsync(_path, "{ this is not valid json");
using GalaxyHierarchySnapshotStore store = new(
Options.Create(new GalaxyRepositoryOptions { PersistSnapshot = true, SnapshotCachePath = _path }));
Assert.Null(await store.TryLoadAsync(CancellationToken.None));
}
}
@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<IsPackable>false</IsPackable>
<!-- Test project does not ship; no XML docs required (overrides Directory.Build.props). -->
<GenerateDocumentationFile>false</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="coverlet.collector" />
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="xunit" />
<PackageReference Include="xunit.runner.visualstudio" />
<PackageReference Include="Microsoft.Data.SqlClient" />
</ItemGroup>
<ItemGroup>
<Using Include="Xunit" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\ZB.MOM.WW.GalaxyRepository\ZB.MOM.WW.GalaxyRepository.csproj" />
</ItemGroup>
</Project>
+5
View File
@@ -7,6 +7,11 @@
<LangVersion>latest</LangVersion> <LangVersion>latest</LangVersion>
<Version>0.1.0</Version> <Version>0.1.0</Version>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally> <ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
<!-- Emit XML docs so the public API summaries ship inside the packed nupkgs (IntelliSense for
consumers). CS1591 (missing doc on a public member) is suppressed so undocumented test /
non-packed members do not break the build; the src public surface is fully documented. -->
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<NoWarn>$(NoWarn);CS1591</NoWarn>
</PropertyGroup> </PropertyGroup>
</Project> </Project>
@@ -103,27 +103,41 @@ public sealed class ActiveNodeHealthCheck : IHealthCheck
if (system is null) if (system is null)
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available.")); return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
var cluster = Cluster.Get(system); MemberStatus selfStatus;
var self = cluster.SelfMember; bool selfUp;
var selfUp = self.Status == MemberStatus.Up;
bool hasRole; bool hasRole;
bool isLeader; bool isLeader;
if (_role is null) try
{ {
hasRole = false; // Reading cluster membership can throw while the ActorSystem exists but the cluster has
var leader = cluster.State.Leader; // not finished initialising (e.g. Akka.Cluster not yet configured →
isLeader = leader is not null && leader == self.Address; // ConfigurationException). The spec's startup-safety rule maps this to Degraded rather
// than letting the exception escape (which the host would record as Unhealthy).
var cluster = Cluster.Get(system);
var self = cluster.SelfMember;
selfStatus = self.Status;
selfUp = selfStatus == MemberStatus.Up;
if (_role is null)
{
hasRole = false;
var leader = cluster.State.Leader;
isLeader = leader is not null && leader == self.Address;
}
else
{
hasRole = self.HasRole(_role);
var roleLeader = cluster.State.RoleLeader(_role);
isLeader = roleLeader is not null && roleLeader == self.Address;
}
} }
else catch (Exception ex) when (ex is not OperationCanceledException)
{ {
hasRole = self.HasRole(_role); return Task.FromResult(HealthCheckResult.Degraded("Akka cluster state not yet accessible.", ex));
var roleLeader = cluster.State.RoleLeader(_role);
isLeader = roleLeader is not null && roleLeader == self.Address;
} }
var health = ActiveNodeDecision.Evaluate(selfUp, isLeader, hasRole, _role); var health = ActiveNodeDecision.Evaluate(selfUp, isLeader, hasRole, _role);
var description = DescribeResult(health, self.Status, selfUp, isLeader); var description = DescribeResult(health, selfStatus, selfUp, isLeader);
var result = health switch var result = health switch
{ {
HealthStatus.Healthy => HealthCheckResult.Healthy(description), HealthStatus.Healthy => HealthCheckResult.Healthy(description),
@@ -8,7 +8,8 @@ namespace ZB.MOM.WW.Health.Akka;
/// <summary> /// <summary>
/// Health check that maps the local node's Akka cluster membership status to a /// Health check that maps the local node's Akka cluster membership status to a
/// <see cref="HealthStatus"/> through a configurable <see cref="AkkaClusterStatusPolicy"/>. /// <see cref="HealthStatus"/> through a configurable <see cref="AkkaClusterStatusPolicy"/>.
/// Register to the <see cref="ZbHealthTags.Ready"/> tag (recommended <c>[ready, active]</c>). /// Register to the <see cref="ZbHealthTags.Ready"/> tag only — cluster membership is a readiness
/// concern; the <see cref="ZbHealthTags.Active"/> tier is reserved for the leader / active-node probe.
/// </summary> /// </summary>
/// <remarks> /// <remarks>
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider. If it is not yet /// The <see cref="ActorSystem"/> is resolved lazily from the service provider. If it is not yet
@@ -42,7 +43,21 @@ public sealed class AkkaClusterHealthCheck : IHealthCheck
if (system is null) if (system is null)
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available.")); return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
var status = Cluster.Get(system).SelfMember.Status; MemberStatus status;
try
{
// Cluster.Get(system).SelfMember can throw while the ActorSystem exists but the cluster
// has not finished initialising (e.g. Akka.Cluster not yet configured →
// ConfigurationException). The spec's startup-safety rule maps this to Degraded, not an
// escaping exception (which the host would record as Unhealthy and pull the node from
// rotation).
status = Cluster.Get(system).SelfMember.Status;
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
return Task.FromResult(HealthCheckResult.Degraded("Akka cluster state not yet accessible.", ex));
}
var health = _policy.Evaluate(status); var health = _policy.Evaluate(status);
var description = $"Akka cluster member status: {status}"; var description = $"Akka cluster member status: {status}";
var result = health switch var result = health switch
@@ -13,14 +13,15 @@ namespace ZB.MOM.WW.Health;
/// The probe is injectable via <see cref="GrpcDependencyOptions.Probe"/>; the default drives the /// The probe is injectable via <see cref="GrpcDependencyOptions.Probe"/>; the default drives the
/// channel to a connected state with <see cref="GrpcChannel.ConnectAsync"/>. The result is /// channel to a connected state with <see cref="GrpcChannel.ConnectAsync"/>. The result is
/// <see cref="HealthStatus.Healthy"/> when the probe returns <c>true</c>, and /// <see cref="HealthStatus.Healthy"/> when the probe returns <c>true</c>, and
/// <see cref="HealthStatus.Unhealthy"/> when it returns <c>false</c>, throws an /// <see cref="HealthStatus.Unhealthy"/> when it returns <c>false</c>, throws any exception
/// <see cref="RpcException"/>, or times out / is cancelled within /// (<see cref="RpcException"/> or otherwise), or times out within
/// <see cref="GrpcDependencyOptions.Timeout"/>. /// <see cref="GrpcDependencyOptions.Timeout"/>. External cancellation of the supplied
/// <see cref="CancellationToken"/> propagates as an <see cref="OperationCanceledException"/>.
/// </para> /// </para>
/// <para> /// <para>
/// Recommended registration tags: <see cref="ZbHealthTags.Ready"/> and /// Recommended registration tag: <see cref="ZbHealthTags.Ready"/> only — downstream gRPC
/// <see cref="ZbHealthTags.Active"/> — a missing downstream gRPC dependency makes the node both /// reachability is a readiness concern; the <see cref="ZbHealthTags.Active"/> tier is reserved for
/// not-ready and not-able-to-act. The registrant applies the tags. /// the leader / active-node probe. The registrant applies the tag.
/// </para> /// </para>
/// </remarks> /// </remarks>
public sealed class GrpcDependencyHealthCheck : IHealthCheck public sealed class GrpcDependencyHealthCheck : IHealthCheck
@@ -74,6 +75,15 @@ public sealed class GrpcDependencyHealthCheck : IHealthCheck
{ {
return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex); return HealthCheckResult.Unhealthy($"{name} probe timed out after {_options.Timeout}.", ex);
} }
catch (Exception ex)
{
// Catch-all to match the sibling DatabaseHealthCheck: any other probe error
// (e.g. InvalidOperationException / HttpRequestException / SocketException from the
// transport, or anything a custom probe throws) maps to Unhealthy rather than escaping
// the IHealthCheck boundary. The OCE/Rpc external-cancellation handlers above run first,
// so caller cancellation still propagates.
return HealthCheckResult.Unhealthy($"{name} probe failed: {ex.Message}", ex);
}
} }
/// <summary> /// <summary>
@@ -28,9 +28,9 @@ public static class ZbHealthEndpointExtensions
/// emits a minimal <c>200 OK</c> body. /// emits a minimal <c>200 OK</c> body.
/// </remarks> /// </remarks>
/// <returns> /// <returns>
/// The <see cref="IEndpointConventionBuilder"/> for the readiness (<c>/health/ready</c>) endpoint. /// A composite <see cref="IEndpointConventionBuilder"/> that fans every chained convention out to
/// A single tier is returned (rather than a composite) to keep the API simple; conventions /// <em>all three</em> health endpoints (readiness, active, and liveness). For example,
/// applied to the result affect only the readiness endpoint. /// <c>endpoints.MapZbHealth().RequireHost("…")</c> gates all three endpoints, as a caller expects.
/// </returns> /// </returns>
public static IEndpointConventionBuilder MapZbHealth( public static IEndpointConventionBuilder MapZbHealth(
this IEndpointRouteBuilder endpoints, this IEndpointRouteBuilder endpoints,
@@ -47,7 +47,7 @@ public static class ZbHealthEndpointExtensions
ResponseWriter = responseWriter, ResponseWriter = responseWriter,
}).AllowAnonymous(); }).AllowAnonymous();
endpoints.MapHealthChecks(options.ActivePath, new HealthCheckOptions var active = endpoints.MapHealthChecks(options.ActivePath, new HealthCheckOptions
{ {
Predicate = static c => c.Tags.Contains(ZbHealthTags.Active), Predicate = static c => c.Tags.Contains(ZbHealthTags.Active),
ResponseWriter = responseWriter, ResponseWriter = responseWriter,
@@ -56,12 +56,38 @@ public static class ZbHealthEndpointExtensions
// Liveness: run no checks. The endpoint returns 200 as long as the process can respond. // Liveness: run no checks. The endpoint returns 200 as long as the process can respond.
// No JSON writer — the empty report would carry no useful data, so the framework default // No JSON writer — the empty report would carry no useful data, so the framework default
// (a minimal plain-text body) is sufficient. // (a minimal plain-text body) is sufficient.
endpoints.MapHealthChecks(options.LivePath, new HealthCheckOptions var live = endpoints.MapHealthChecks(options.LivePath, new HealthCheckOptions
{ {
Predicate = static _ => false, Predicate = static _ => false,
}).AllowAnonymous(); }).AllowAnonymous();
return ready; return new CompositeEndpointConventionBuilder(ready, active, live);
}
/// <summary>
/// An <see cref="IEndpointConventionBuilder"/> that forwards each convention to several
/// underlying builders, so conventions chained onto the result of
/// <see cref="MapZbHealth(IEndpointRouteBuilder, ZbHealthEndpointOptions?)"/> apply to all three
/// health endpoints rather than just one.
/// </summary>
private sealed class CompositeEndpointConventionBuilder : IEndpointConventionBuilder
{
private readonly IEndpointConventionBuilder[] _builders;
public CompositeEndpointConventionBuilder(params IEndpointConventionBuilder[] builders) =>
_builders = builders;
public void Add(Action<EndpointBuilder> convention)
{
foreach (var builder in _builders)
builder.Add(convention);
}
public void Finally(Action<EndpointBuilder> finalConvention)
{
foreach (var builder in _builders)
builder.Finally(finalConvention);
}
} }
/// <summary> /// <summary>
@@ -70,7 +96,10 @@ public static class ZbHealthEndpointExtensions
/// </summary> /// </summary>
/// <param name="endpoints">The endpoint route builder to map onto.</param> /// <param name="endpoints">The endpoint route builder to map onto.</param>
/// <param name="configure">Callback that mutates a fresh <see cref="ZbHealthEndpointOptions"/>.</param> /// <param name="configure">Callback that mutates a fresh <see cref="ZbHealthEndpointOptions"/>.</param>
/// <returns>The <see cref="IEndpointConventionBuilder"/> for the readiness endpoint.</returns> /// <returns>
/// A composite <see cref="IEndpointConventionBuilder"/> that fans chained conventions out to all
/// three health endpoints.
/// </returns>
public static IEndpointConventionBuilder MapZbHealth( public static IEndpointConventionBuilder MapZbHealth(
this IEndpointRouteBuilder endpoints, this IEndpointRouteBuilder endpoints,
Action<ZbHealthEndpointOptions> configure) Action<ZbHealthEndpointOptions> configure)
@@ -1,5 +1,4 @@
using System.Text.Json; using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Diagnostics.HealthChecks; using Microsoft.Extensions.Diagnostics.HealthChecks;
@@ -21,15 +20,21 @@ namespace ZB.MOM.WW.Health;
/// } /// }
/// } /// }
/// </code> /// </code>
/// The HTTP status code is left to the ASP.NET Core health-checks middleware (Healthy/Degraded → 200, /// The <c>description</c> key is always present; when a check supplies no description it is emitted
/// Unhealthy → 503); this writer only renders the body and sets <c>Content-Type: application/json</c>. /// as JSON <c>null</c> (not omitted), matching the spec example and the <c>HealthChecks.UI.Client</c>
/// shape. The HTTP status code is left to the ASP.NET Core health-checks middleware (Healthy/Degraded
/// → 200, Unhealthy → 503); this writer only renders the body and sets
/// <c>Content-Type: application/json</c>.
/// </remarks> /// </remarks>
public static class ZbHealthWriter public static class ZbHealthWriter
{ {
// Null properties are emitted (not omitted) so a null `description` renders as
// "description": null — matching the SPEC §3 example and the HealthChecks.UI.Client shape this
// writer mirrors. Consumers can then read entries.<name>.description without handling a missing
// property. (Do not set DefaultIgnoreCondition = WhenWritingNull here.)
private static readonly JsonSerializerOptions SerializerOptions = new() private static readonly JsonSerializerOptions SerializerOptions = new()
{ {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase, PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
}; };
/// <summary> /// <summary>
@@ -86,6 +86,50 @@ public sealed class ActiveNodeDecisionTests
Assert.False(gate.IsActiveNode); Assert.False(gate.IsActiveNode);
} }
[Fact]
public async Task HealthCheck_RoleLess_ClusterInaccessible_ReturnsDegraded()
{
// ActorSystem present but Akka.Cluster not configured → Cluster.Get throws. The check must
// return Degraded (startup-safety rule), not let the exception escape (→ Unhealthy).
using var system = ActorSystem.Create("plain-no-cluster-roleless");
try
{
var provider = new ServiceCollection()
.AddSingleton(system)
.BuildServiceProvider();
var check = new ActiveNodeHealthCheck(provider);
var result = await check.CheckHealthAsync(NewContext(check));
Assert.Equal(HealthStatus.Degraded, result.Status);
}
finally
{
await system.Terminate();
}
}
[Fact]
public async Task HealthCheck_RoleFiltered_ClusterInaccessible_ReturnsDegraded()
{
using var system = ActorSystem.Create("plain-no-cluster-rolefiltered");
try
{
var provider = new ServiceCollection()
.AddSingleton(system)
.BuildServiceProvider();
var check = new ActiveNodeHealthCheck(provider, "admin");
var result = await check.CheckHealthAsync(NewContext(check));
Assert.Equal(HealthStatus.Degraded, result.Status);
}
finally
{
await system.Terminate();
}
}
private static HealthCheckContext NewContext(IHealthCheck check) => new() private static HealthCheckContext NewContext(IHealthCheck check) => new()
{ {
Registration = new HealthCheckRegistration("active-node", check, HealthStatus.Unhealthy, tags: null), Registration = new HealthCheckRegistration("active-node", check, HealthStatus.Unhealthy, tags: null),
@@ -1,3 +1,4 @@
using Akka.Actor;
using Akka.Cluster; using Akka.Cluster;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Diagnostics.HealthChecks; using Microsoft.Extensions.Diagnostics.HealthChecks;
@@ -70,6 +71,30 @@ public sealed class AkkaClusterStatusPolicyTests
Assert.Equal(HealthStatus.Degraded, result.Status); Assert.Equal(HealthStatus.Degraded, result.Status);
} }
[Fact]
public async Task HealthCheck_ActorSystemPresentButClusterInaccessible_ReturnsDegraded()
{
// A plain (non-clustered) ActorSystem exists in DI, but Akka.Cluster is not configured,
// so Cluster.Get(system) throws a ConfigurationException — the startup race the spec calls
// out. The check must return Degraded, not let the exception escape (→ Unhealthy via the host).
using var system = ActorSystem.Create("plain-no-cluster");
try
{
var provider = new ServiceCollection()
.AddSingleton(system)
.BuildServiceProvider();
var check = new AkkaClusterHealthCheck(provider, AkkaClusterStatusPolicy.Default);
var result = await check.CheckHealthAsync(NewContext(check));
Assert.Equal(HealthStatus.Degraded, result.Status);
}
finally
{
await system.Terminate();
}
}
private static HealthCheckContext NewContext(IHealthCheck check) => new() private static HealthCheckContext NewContext(IHealthCheck check) => new()
{ {
Registration = new HealthCheckRegistration("akka-cluster", check, HealthStatus.Unhealthy, tags: null), Registration = new HealthCheckRegistration("akka-cluster", check, HealthStatus.Unhealthy, tags: null),
@@ -71,6 +71,21 @@ public sealed class GrpcDependencyHealthCheckTests
Assert.Contains("mxaccessgw worker", result.Description); Assert.Contains("mxaccessgw worker", result.Description);
} }
[Fact]
public async Task ProbeThrowsArbitraryException_Unhealthy()
{
// A non-RpcException / non-OperationCanceledException (e.g. the transport surfacing an
// InvalidOperationException) must be caught and mapped to Unhealthy, not allowed to escape.
var result = await RunAsync(new GrpcDependencyOptions
{
DependencyName = "mxaccessgw worker",
Probe = static (_, _) => throw new InvalidOperationException("channel disposed"),
});
Assert.Equal(HealthStatus.Unhealthy, result.Status);
Assert.Contains("mxaccessgw worker", result.Description);
}
[Fact] [Fact]
public async Task ProbeExceedsTimeout_Unhealthy() public async Task ProbeExceedsTimeout_Unhealthy()
{ {
@@ -31,7 +31,7 @@ public sealed class ResponseWriterTests
} }
private static async Task<HttpResponseMessage> GetReadyAsync( private static async Task<HttpResponseMessage> GetReadyAsync(
HealthStatus status, string description = "db reachable") HealthStatus status, string? description = "db reachable")
{ {
var builder = WebApplication.CreateBuilder(); var builder = WebApplication.CreateBuilder();
builder.WebHost.UseTestServer(); builder.WebHost.UseTestServer();
@@ -66,6 +66,24 @@ public sealed class ResponseWriterTests
Assert.Equal("db reachable", db.GetProperty("description").GetString()); Assert.Equal("db reachable", db.GetProperty("description").GetString());
} }
[Fact]
public async Task ReadyEndpoint_NullDescription_EmitsDescriptionKeyAsNull()
{
// A check that produces no description must still emit the "description" key with a JSON null
// value (matching the spec §3 example and the HealthChecks.UI.Client shape) rather than
// dropping the key — so consumers can read entries.<name>.description without handling a
// missing property.
var response = await GetReadyAsync(HealthStatus.Healthy, description: null);
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
using var doc = JsonDocument.Parse(await response.Content.ReadAsStringAsync());
var db = doc.RootElement.GetProperty("entries").GetProperty("db");
Assert.True(db.TryGetProperty("description", out var description), "description key must be present");
Assert.Equal(JsonValueKind.Null, description.ValueKind);
}
[Fact] [Fact]
public async Task ReadyEndpoint_Degraded_Returns200_WithDegradedStatus() public async Task ReadyEndpoint_Degraded_Returns200_WithDegradedStatus()
{ {
@@ -127,6 +127,31 @@ public sealed class TierMappingTests
Assert.Equal(0, active.Invocations); Assert.Equal(0, active.Invocations);
} }
[Fact]
public async Task ChainedConvention_AppliesToAllThreeEndpoints()
{
// MapZbHealth returns a composite builder, so a convention chained onto its result
// (.RequireHost) must gate all three endpoints — not just readiness. With a host filter
// that does not match the default test-client host, every tier returns 404.
var builder = WebApplication.CreateBuilder();
builder.WebHost.UseTestServer();
builder.Services.AddHealthChecks()
.AddCheck("ready-check", new RecordingHealthCheck(HealthStatus.Healthy), tags: new[] { ZbHealthTags.Ready })
.AddCheck("active-check", new RecordingHealthCheck(HealthStatus.Healthy), tags: new[] { ZbHealthTags.Active });
await using var app = builder.Build();
app.MapZbHealth().RequireHost("health.internal");
await app.StartAsync();
var client = app.GetTestClient();
// The default test host does not match "health.internal", so the convention removed every
// endpoint from this host — confirming it fanned out to all three, not just readiness.
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/health/ready")).StatusCode);
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/health/active")).StatusCode);
Assert.Equal(HttpStatusCode.NotFound, (await client.GetAsync("/healthz")).StatusCode);
}
[Fact] [Fact]
public async Task Options_OverrideRoutePaths() public async Task Options_OverrideRoutePaths()
{ {
+13 -10
View File
@@ -4,7 +4,7 @@ Observability libraries for the **ZB.MOM.WW SCADA family** (OtOpcUa, MxAccessGat
The library normalizes the three-project observability surface: a shared OpenTelemetry Resource driven by a single identity triple (`service.name` / `site.id` / `node.role`), standard instrumentation wiring, Prometheus and OTLP export, and a Serilog bootstrap with enrichers and `TraceContextEnricher` for trace↔log correlation. The library normalizes the three-project observability surface: a shared OpenTelemetry Resource driven by a single identity triple (`service.name` / `site.id` / `node.role`), standard instrumentation wiring, Prometheus and OTLP export, and a Serilog bootstrap with enrichers and `TraceContextEnricher` for trace↔log correlation.
**Built at 0.1.0. MxAccessGateway logging adopted (MEL → Serilog migration done on its own branch). OtOpcUa and ScadaBridge telemetry adoption is follow-on.** Adoption tracked in `~/Desktop/scadaproj/components/observability/GAPS.md`. **Built at 0.1.0, published to the Gitea NuGet feed, and adopted across all three apps on 2026-06-01** (branch `feat/adopt-zb-telemetry` per repo, behaviour-preserving). MxAccessGateway's MEL→Serilog migration + metrics export both landed in this pass — they were *not* actually done beforehand despite the earlier claim. ScadaBridge keeps its `LoggerConfigurationFactory` (min-level governance) and only adds the shared `TraceContextEnricher`; it does not call `AddZbSerilog`. Per-repo result + deferred follow-ons tracked in `~/Desktop/scadaproj/components/observability/GAPS.md`.
--- ---
@@ -21,12 +21,13 @@ The library normalizes the three-project observability surface: a shared OpenTel
| Consumer | `ZB.MOM.WW.Telemetry` (core) | `ZB.MOM.WW.Telemetry.Serilog` | | Consumer | `ZB.MOM.WW.Telemetry` (core) | `ZB.MOM.WW.Telemetry.Serilog` |
|---|:---:|:---:| |---|:---:|:---:|
| **OtOpcUa** | yes (after adoption) | yes (after adoption) | | **OtOpcUa** | ✅ adopted | ✅ adopted (`AddZbSerilog`) |
| **MxAccessGateway** | yes (after adoption) | yes (MELSerilog adopted now) | | **MxAccessGateway** | ✅ adopted (`GatewayMetrics` exported) | ✅ adopted (MELSerilog migrated in this pass) |
| **ScadaBridge** | yes (after adoption) | yes (after adoption) | | **ScadaBridge** | ✅ adopted (both roots) | ⚠️ referenced for `TraceContextEnricher` only — keeps `LoggerConfigurationFactory`, does **not** call `AddZbSerilog` |
MxAccessGateway's logging adoption is the one in-pass migration. Full metrics/tracing wiring All three adopted on 2026-06-01 (branch `feat/adopt-zb-telemetry` per repo). ScadaBridge's logging
for all three apps is follow-on. deviates: it keeps its own `LoggerConfigurationFactory` (min-level governance contract) and only
adds the shared `TraceContextEnricher`. See `components/observability/GAPS.md` for the full result.
--- ---
@@ -60,11 +61,13 @@ All test assemblies run offline:
## Status ## Status
Built at **0.1.0** and published to the Gitea NuGet feed. MxAccessGateway logging (MEL → Serilog) Built at **0.1.0**, published to the Gitea NuGet feed, and **adopted across all three apps on
adopted on its own branch. **OtOpcUa and ScadaBridge telemetry adoption not yet started** 2026-06-01** (branch `feat/adopt-zb-telemetry` per repo, behaviour-preserving). MxAccessGateway's
tracked in the component backlog: MEL→Serilog migration and metrics export both landed in this pass (not beforehand, despite the
earlier claim). Deferred follow-ons (MxGateway `ms``s` + Meter rename, ScadaBridge app instruments
+ Site-node HTTP/1.1 metrics listener, OTLP wiring) are tracked in the component backlog:
- `~/Desktop/scadaproj/components/observability/GAPS.md` — adoption order, effort, and risk - `~/Desktop/scadaproj/components/observability/GAPS.md` — adoption status + deferred follow-ons
Design documentation: Design documentation:
+17 -3
View File
@@ -55,6 +55,20 @@ Trace↔log correlation is automatic: `TraceContextEnricher` reads `Activity.Cur
log event and attaches `trace_id` and `span_id`, so log events produced inside a traced request log event and attaches `trace_id` and `span_id`, so log events produced inside a traced request
carry the same span identity as the trace backend. carry the same span identity as the trace backend.
**Redaction reach.** A registered `ILogRedactor` may **remove** or **replace** any value, and
`RedactionEnricher` honours both (a removed key is dropped from the event). Scalar properties appear
as their unwrapped CLR value; **destructured** properties are projected into mutable views the
redactor can descend into — a `{@Object}` is an `IDictionary<string, object?>` of its fields, a
logged collection an `IList<object?>`, a logged dictionary an `IDictionary<string, object?>` — all
recursively, so a field **nested inside** a destructured object can be masked or removed:
```csharp
if (properties["command"] is IDictionary<string, object?> command) command["apiKey"] = "***";
```
Structure type tags and dictionary keys are preserved on rebuild, and untouched properties are left
intact (not reallocated). See the `ILogRedactor` XML doc for the full contract.
--- ---
## Exporter options ## Exporter options
@@ -113,9 +127,9 @@ backend):
| Assembly | Tests | | Assembly | Tests |
|---|---| |---|---|
| `ZB.MOM.WW.Telemetry.Tests` | 7 | | `ZB.MOM.WW.Telemetry.Tests` | 12 |
| `ZB.MOM.WW.Telemetry.Serilog.Tests` | 12 | | `ZB.MOM.WW.Telemetry.Serilog.Tests` | 17 |
| **Total** | **19** | | **Total** | **29** |
--- ---
@@ -11,6 +11,20 @@ public interface ILogRedactor
/// <summary> /// <summary>
/// Inspects and mutates the supplied log-event <paramref name="properties"/> in place — remove /// Inspects and mutates the supplied log-event <paramref name="properties"/> in place — remove
/// or replace any sensitive values. Called on every log event before it reaches any sink. /// or replace any sensitive values. Called on every log event before it reaches any sink.
/// Both removing a key (the property is dropped from the event) and replacing its value are
/// honoured by <see cref="RedactionEnricher"/>.
/// <para>
/// <b>Reach — top-level and nested.</b> A scalar property (e.g. <c>{apiKey}</c>) appears as its
/// unwrapped CLR value, which you can read and replace directly. A <b>destructured</b> property is
/// projected into a mutable view you can descend into: a <c>{@Object}</c> arrives as an
/// <c>IDictionary&lt;string, object?&gt;</c> of its fields, a logged collection as an
/// <c>IList&lt;object?&gt;</c>, and a logged dictionary as an <c>IDictionary&lt;string, object?&gt;</c>
/// keyed by the string form of each key — all recursively. You can therefore mask or remove a field
/// nested inside a destructured object, for example:
/// <code>if (properties["command"] is IDictionary&lt;string, object?&gt; command) command["apiKey"] = "***";</code>
/// The structure's type tag and a dictionary's original keys are preserved when the event is
/// rebuilt, and properties you do not touch are left intact.
/// </para>
/// </summary> /// </summary>
/// <param name="properties">The mutable property dictionary for the current log event.</param> /// <param name="properties">The mutable property dictionary for the current log event.</param>
void Redact(IDictionary<string, object?> properties); void Redact(IDictionary<string, object?> properties);
@@ -30,53 +30,309 @@ public sealed class RedactionEnricher : ILogEventEnricher
} }
/// <summary> /// <summary>
/// Hands the log event's scalar properties to the registered <see cref="ILogRedactor"/> and /// Hands the log event's properties to the registered <see cref="ILogRedactor"/> and reconciles
/// writes back any values the redactor changed. No-op when no redactor is registered. /// the result back onto the event. No-op when no redactor is registered or the event carries no
/// properties.
/// <para>
/// Each property is projected into a mutable view the redactor can edit: a scalar is its
/// unwrapped value, and a structured value (<see cref="StructureValue"/> from <c>{@Object}</c>,
/// <see cref="SequenceValue"/>, <see cref="DictionaryValue"/>) becomes a nested
/// <see cref="IDictionary{TKey,TValue}"/> / <see cref="IList{T}"/> the redactor can descend into —
/// recursively — so a field nested inside a destructured object can be masked or removed. After
/// redaction each property is rebuilt and written back only when it actually changed; the
/// structure's type tag and a dictionary's original keys are preserved on rebuild, and keys the
/// redactor removed are deleted via <c>RemovePropertyIfPresent</c>. Properties the redactor does
/// not touch are left intact and are not reallocated.
/// </para>
/// </summary> /// </summary>
/// <param name="logEvent">The log event to redact.</param> /// <param name="logEvent">The log event to redact.</param>
/// <param name="propertyFactory">Factory used to materialize replacement properties.</param> /// <param name="propertyFactory">Unused; structured values are rebuilt directly into Serilog values.</param>
public void Enrich(LogEvent logEvent, ILogEventPropertyFactory propertyFactory) public void Enrich(LogEvent logEvent, ILogEventPropertyFactory propertyFactory)
{ {
ArgumentNullException.ThrowIfNull(logEvent); ArgumentNullException.ThrowIfNull(logEvent);
ArgumentNullException.ThrowIfNull(propertyFactory); ArgumentNullException.ThrowIfNull(propertyFactory);
var redactor = ResolveRedactor(); var redactor = _redactor.Value;
if (redactor is null) if (redactor is null)
{ {
return; return;
} }
var snapshot = new Dictionary<string, object?>(logEvent.Properties.Count); // Hot path: an event with no properties has nothing to redact — skip the snapshot copy.
if (logEvent.Properties.Count == 0)
{
return;
}
// Project every property into a mutable view. Scalars stay as their CLR value (zero extra
// allocation); structured values become nested dictionaries/lists carrying enough metadata
// (node kind, type tag, original dictionary keys) to be rebuilt faithfully.
var snapshot = new Dictionary<string, object?>(logEvent.Properties.Count, StringComparer.Ordinal);
foreach (var property in logEvent.Properties) foreach (var property in logEvent.Properties)
{ {
snapshot[property.Key] = property.Value is ScalarValue scalar snapshot[property.Key] = Project(property.Value);
? scalar.Value
: property.Value;
} }
// Capture the original key set so we can honour deletions: any key the redactor drops from
// the snapshot must be removed from the event (not silently retained).
var originalKeys = new HashSet<string>(snapshot.Keys, StringComparer.Ordinal);
redactor.Redact(snapshot); redactor.Redact(snapshot);
foreach (var entry in snapshot) foreach (var entry in snapshot)
{ {
if (HasChanged(logEvent, entry.Key, entry.Value)) // Rebuild the (possibly redacted) value and write it back only when it differs from what
// the event already holds, so an untouched property is never needlessly reallocated.
var rebuilt = Rebuild(entry.Value);
if (!logEvent.Properties.TryGetValue(entry.Key, out var existing) || !ValueEquals(existing, rebuilt))
{ {
logEvent.AddOrUpdateProperty( logEvent.AddOrUpdateProperty(new LogEventProperty(entry.Key, rebuilt));
propertyFactory.CreateProperty(entry.Key, entry.Value)); }
}
// Reconcile removals: a redactor that deleted a key from the snapshot (e.g.
// properties.Remove("apiKey")) means that property must not reach any sink.
foreach (var key in originalKeys)
{
if (!snapshot.ContainsKey(key))
{
logEvent.RemovePropertyIfPresent(key);
} }
} }
} }
private ILogRedactor? ResolveRedactor() => _redactor.Value; /// <summary>
/// Projects an immutable Serilog value into a mutable view the redactor can edit. Scalars unwrap
private static bool HasChanged(LogEvent logEvent, string key, object? newValue) /// to their CLR value; structures/sequences/dictionaries become nested mutable wrappers that
/// remember their kind (and a structure's type tag / a dictionary's original keys) for rebuild.
/// </summary>
private static object? Project(LogEventPropertyValue value)
{ {
if (!logEvent.Properties.TryGetValue(key, out var existing)) switch (value)
{ {
// Redactor added a brand-new property. case ScalarValue scalar:
return true; return scalar.Value;
case StructureValue structure:
var projectedStructure = new ProjectedStructure(structure.TypeTag, structure.Properties.Count);
foreach (var property in structure.Properties)
{
projectedStructure[property.Name] = Project(property.Value);
}
return projectedStructure;
case SequenceValue sequence:
var projectedSequence = new ProjectedSequence(sequence.Elements.Count);
foreach (var element in sequence.Elements)
{
projectedSequence.Add(Project(element));
}
return projectedSequence;
case DictionaryValue dictionary:
var projectedDictionary = new ProjectedDictionary(dictionary.Elements.Count);
foreach (var pair in dictionary.Elements)
{
var key = pair.Key.Value?.ToString() ?? NullKey;
projectedDictionary[key] = Project(pair.Value);
projectedDictionary.OriginalKeys[key] = pair.Key;
}
return projectedDictionary;
default:
// Unknown future LogEventPropertyValue subtype — pass the wrapper through untouched.
return value;
}
}
/// <summary>
/// Rebuilds a (possibly redacted) projected value back into an immutable Serilog value. The
/// inverse of <see cref="Project"/>; also accepts plain dictionaries/lists a redactor synthesised
/// and leaf CLR values it substituted.
/// </summary>
private static LogEventPropertyValue Rebuild(object? projected)
{
switch (projected)
{
case ProjectedStructure structure:
return new StructureValue(RebuildProperties(structure), structure.TypeTag);
case ProjectedDictionary dictionary:
var pairs = new List<KeyValuePair<ScalarValue, LogEventPropertyValue>>(dictionary.Count);
foreach (var entry in dictionary)
{
var key = dictionary.OriginalKeys.TryGetValue(entry.Key, out var original)
? original
: new ScalarValue(entry.Key);
pairs.Add(new KeyValuePair<ScalarValue, LogEventPropertyValue>(key, Rebuild(entry.Value)));
}
return new DictionaryValue(pairs);
case ProjectedSequence sequence:
return new SequenceValue(RebuildElements(sequence));
case IDictionary<string, object?> injected:
// A redactor synthesised a new structure (plain dictionary) — rebuild as a StructureValue.
return new StructureValue(RebuildProperties(injected));
case IList<object?> injectedList:
return new SequenceValue(RebuildElements(injectedList));
case LogEventPropertyValue raw:
// An unknown subtype passed through by Project, or a value the redactor set directly.
return raw;
default:
return new ScalarValue(projected);
}
}
private static List<LogEventProperty> RebuildProperties(IDictionary<string, object?> source)
{
var properties = new List<LogEventProperty>(source.Count);
foreach (var entry in source)
{
properties.Add(new LogEventProperty(entry.Key, Rebuild(entry.Value)));
} }
var existingValue = existing is ScalarValue scalar ? scalar.Value : existing; return properties;
return !Equals(existingValue, newValue); }
private static List<LogEventPropertyValue> RebuildElements(IList<object?> source)
{
var elements = new List<LogEventPropertyValue>(source.Count);
foreach (var element in source)
{
elements.Add(Rebuild(element));
}
return elements;
}
/// <summary>
/// Structural equality between two Serilog values, used to skip writing back a property the
/// redactor left unchanged. Compares scalars by value and structures/sequences/dictionaries by
/// their contents (recursively); unknown kinds fall back to reference equality.
/// </summary>
private static bool ValueEquals(LogEventPropertyValue left, LogEventPropertyValue right)
{
switch (left)
{
case ScalarValue scalar when right is ScalarValue otherScalar:
return Equals(scalar.Value, otherScalar.Value);
case StructureValue structure when right is StructureValue otherStructure:
if (structure.TypeTag != otherStructure.TypeTag
|| structure.Properties.Count != otherStructure.Properties.Count)
{
return false;
}
foreach (var property in structure.Properties)
{
var match = FindProperty(otherStructure, property.Name);
if (match is null || !ValueEquals(property.Value, match.Value))
{
return false;
}
}
return true;
case SequenceValue sequence when right is SequenceValue otherSequence:
if (sequence.Elements.Count != otherSequence.Elements.Count)
{
return false;
}
for (var i = 0; i < sequence.Elements.Count; i++)
{
if (!ValueEquals(sequence.Elements[i], otherSequence.Elements[i]))
{
return false;
}
}
return true;
case DictionaryValue dictionary when right is DictionaryValue otherDictionary:
if (dictionary.Elements.Count != otherDictionary.Elements.Count)
{
return false;
}
foreach (var pair in dictionary.Elements)
{
var match = FindDictionaryValue(otherDictionary, pair.Key);
if (match is null || !ValueEquals(pair.Value, match))
{
return false;
}
}
return true;
default:
return ReferenceEquals(left, right);
}
}
private static LogEventProperty? FindProperty(StructureValue structure, string name)
{
foreach (var property in structure.Properties)
{
if (property.Name == name)
{
return property;
}
}
return null;
}
private static LogEventPropertyValue? FindDictionaryValue(DictionaryValue dictionary, ScalarValue key)
{
foreach (var pair in dictionary.Elements)
{
if (Equals(pair.Key.Value, key.Value))
{
return pair.Value;
}
}
return null;
}
private const string NullKey = "";
/// <summary>A destructured object projected to a mutable dictionary; preserves its type tag.</summary>
private sealed class ProjectedStructure : Dictionary<string, object?>
{
public ProjectedStructure(string? typeTag, int capacity)
: base(capacity, StringComparer.Ordinal) => TypeTag = typeTag;
public string? TypeTag { get; }
}
/// <summary>A logged dictionary projected to a mutable dictionary; preserves the original keys.</summary>
private sealed class ProjectedDictionary : Dictionary<string, object?>
{
public ProjectedDictionary(int capacity)
: base(capacity, StringComparer.Ordinal) =>
OriginalKeys = new Dictionary<string, ScalarValue>(capacity, StringComparer.Ordinal);
public Dictionary<string, ScalarValue> OriginalKeys { get; }
}
/// <summary>A logged collection projected to a mutable list.</summary>
private sealed class ProjectedSequence : List<object?>
{
public ProjectedSequence(int capacity) : base(capacity)
{
}
} }
} }
@@ -115,38 +115,13 @@ internal static class ZbSerilogConfig
} }
/// <summary> /// <summary>
/// Builds the OTLP Resource-attribute map mirroring <c>ZbResource</c>. Null/empty optional /// Builds the OTLP log-sink Resource-attribute map. This is <em>not</em> a parallel
/// attributes are omitted, matching the shared Resource's omission rules. The /// implementation: it is derived directly from <see cref="ZbResource.BuildAttributes"/> — the
/// <c>service.instance.id</c> is sourced from <see cref="ZbResource.InstanceId"/> — the /// single source of truth shared with the OTel SDK metrics/traces pipeline — so the log sink can
/// same deterministic <c>MachineName:ProcessId</c> value used by the OTel SDK path — so /// never drift from metrics and traces. Returned as a fresh mutable copy because the
/// all three signals carry an identical instance identifier. Internal so it can be asserted /// Serilog OpenTelemetry sink takes ownership of the dictionary it is handed. Internal so it can
/// by the test assembly without being part of the public NuGet API. /// be asserted by the test assembly without being part of the public NuGet API.
/// </summary> /// </summary>
internal static IDictionary<string, object> BuildResourceAttributes(ZbTelemetryOptions options) internal static IDictionary<string, object> BuildResourceAttributes(ZbTelemetryOptions options) =>
{ new Dictionary<string, object>(ZbResource.BuildAttributes(options), StringComparer.Ordinal);
var attributes = new Dictionary<string, object>
{
["service.name"] = options.ServiceName,
["service.namespace"] = options.ServiceNamespace,
["service.instance.id"] = ZbResource.InstanceId,
["host.name"] = Environment.MachineName,
};
if (!string.IsNullOrEmpty(options.ServiceVersion))
{
attributes["service.version"] = options.ServiceVersion;
}
if (!string.IsNullOrEmpty(options.SiteId))
{
attributes["site.id"] = options.SiteId;
}
if (!string.IsNullOrEmpty(options.NodeRole))
{
attributes["node.role"] = options.NodeRole;
}
return attributes;
}
} }
@@ -65,6 +65,10 @@ public static class ZbSerilogExtensions
var options = new ZbTelemetryOptions(); var options = new ZbTelemetryOptions();
configure(options); configure(options);
// Fail fast on a malformed OTLP endpoint with a clear, named message — same validation the
// core AddZbTelemetry path uses — instead of a late error when the OTel log sink builds.
ZbTelemetryOptionsValidator.Validate(options, nameof(configure));
// Register the application logger in DI only. preserveStaticLogger: true ensures // Register the application logger in DI only. preserveStaticLogger: true ensures
// AddSerilog does NOT freeze or replace Log.Logger — critical for multi-host // AddSerilog does NOT freeze or replace Log.Logger — critical for multi-host
// processes (integration tests etc.) where AddZbSerilog may be called more than once. // processes (integration tests etc.) where AddZbSerilog may be called more than once.
@@ -20,6 +20,17 @@
<FrameworkReference Include="Microsoft.AspNetCore.App" /> <FrameworkReference Include="Microsoft.AspNetCore.App" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<!-- The Serilog companion package reuses the internal options validator (single fail-fast
path); its tests assert it too. -->
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
<_Parameter1>ZB.MOM.WW.Telemetry.Serilog</_Parameter1>
</AssemblyAttribute>
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
<_Parameter1>ZB.MOM.WW.Telemetry.Serilog.Tests</_Parameter1>
</AssemblyAttribute>
</ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="OpenTelemetry.Extensions.Hosting" /> <PackageReference Include="OpenTelemetry.Extensions.Hosting" />
<PackageReference Include="OpenTelemetry.Exporter.Prometheus.AspNetCore" /> <PackageReference Include="OpenTelemetry.Exporter.Prometheus.AspNetCore" />
@@ -9,8 +9,10 @@ namespace ZB.MOM.WW.Telemetry;
public static class ZbMetricsEndpointExtensions public static class ZbMetricsEndpointExtensions
{ {
/// <summary> /// <summary>
/// Mounts the Prometheus <c>/metrics</c> endpoint. Only valid when /// Mounts the Prometheus <c>/metrics</c> endpoint. Valid under <em>any</em>
/// <see cref="ZbTelemetryOptions.Exporter"/> = <see cref="ZbExporter.Prometheus"/>. /// <see cref="ZbTelemetryOptions.Exporter"/> value: the Prometheus exporter is always wired by
/// <c>AddZbTelemetry</c>, and OTLP (<see cref="ZbExporter.Otlp"/>) is only an additive overlay —
/// so <c>/metrics</c> serves scrape data even when <c>Exporter = ZbExporter.Otlp</c>.
/// Call after <c>app.UseRouting()</c>. /// Call after <c>app.UseRouting()</c>.
/// </summary> /// </summary>
/// <param name="endpoints">The endpoint route builder.</param> /// <param name="endpoints">The endpoint route builder.</param>
@@ -31,34 +31,55 @@ public static class ZbResource
Configure(ResourceBuilder.CreateDefault(), options); Configure(ResourceBuilder.CreateDefault(), options);
/// <summary> /// <summary>
/// Applies the shared ZB.MOM.WW Resource attributes to an existing <see cref="ResourceBuilder"/>. /// The single source of truth for the shared ZB.MOM.WW Resource attribute set. Every consumer
/// Internal seam so the <c>AddZbTelemetry</c> pipeline produces a Resource identical to /// of the Resource — the OTel SDK metrics/traces pipeline (<see cref="Configure"/>) and the
/// <see cref="Build"/>. /// Serilog OTLP log sink — derives its attributes from this one map, so logs can never drift
/// from metrics/traces. Required attributes (<c>service.name</c>, <c>service.namespace</c>,
/// <c>service.instance.id</c>, <c>host.name</c>) are always present; optional ones
/// (<c>service.version</c>, <c>site.id</c>, <c>node.role</c>) are included only when the
/// corresponding option is non-null/non-empty, matching the Resource's omission rules.
/// </summary> /// </summary>
internal static ResourceBuilder Configure(ResourceBuilder builder, ZbTelemetryOptions options) /// <param name="options">The telemetry options describing the service identity.</param>
/// <returns>The canonical attribute map carried by all three signals.</returns>
public static IReadOnlyDictionary<string, object> BuildAttributes(ZbTelemetryOptions options)
{ {
builder.AddService( ArgumentNullException.ThrowIfNull(options);
serviceName: options.ServiceName,
serviceNamespace: options.ServiceNamespace,
serviceVersion: options.ServiceVersion,
autoGenerateServiceInstanceId: false,
serviceInstanceId: InstanceId);
var attributes = new List<KeyValuePair<string, object>> var attributes = new Dictionary<string, object>(StringComparer.Ordinal)
{ {
new("host.name", System.Environment.MachineName), ["service.name"] = options.ServiceName,
["service.namespace"] = options.ServiceNamespace,
["service.instance.id"] = InstanceId,
["host.name"] = System.Environment.MachineName,
}; };
if (!string.IsNullOrEmpty(options.ServiceVersion))
{
attributes["service.version"] = options.ServiceVersion;
}
if (!string.IsNullOrEmpty(options.SiteId)) if (!string.IsNullOrEmpty(options.SiteId))
{ {
attributes.Add(new("site.id", options.SiteId)); attributes["site.id"] = options.SiteId;
} }
if (!string.IsNullOrEmpty(options.NodeRole)) if (!string.IsNullOrEmpty(options.NodeRole))
{ {
attributes.Add(new("node.role", options.NodeRole)); attributes["node.role"] = options.NodeRole;
} }
return attributes;
}
/// <summary>
/// Applies the shared ZB.MOM.WW Resource attributes to an existing <see cref="ResourceBuilder"/>.
/// Internal seam so the <c>AddZbTelemetry</c> pipeline produces a Resource identical to
/// <see cref="Build"/>. Derives every attribute from <see cref="BuildAttributes"/> — the same
/// canonical map the Serilog OTLP log sink uses — so all three signals agree.
/// </summary>
internal static ResourceBuilder Configure(ResourceBuilder builder, ZbTelemetryOptions options)
{
var attributes = BuildAttributes(options);
builder.AddAttributes(attributes); builder.AddAttributes(attributes);
return builder; return builder;
} }
@@ -101,6 +101,7 @@ public static class ZbTelemetryExtensions
"ZbTelemetryOptions.ServiceName is required (e.g. \"otopcua\").", "ZbTelemetryOptions.ServiceName is required (e.g. \"otopcua\").",
nameof(configure)); nameof(configure));
} }
ZbTelemetryOptionsValidator.Validate(options, nameof(configure));
return options; return options;
} }
@@ -0,0 +1,44 @@
namespace ZB.MOM.WW.Telemetry;
/// <summary>
/// Eager, fail-fast validation of <see cref="ZbTelemetryOptions"/> shared by the core
/// <c>AddZbTelemetry</c> path and the Serilog <c>AddZbSerilog</c> path, so a malformed value is
/// reported once, clearly, and with the offending option named — rather than surfacing late as a
/// bare <see cref="UriFormatException"/> deep inside exporter construction at host-build time.
/// </summary>
internal static class ZbTelemetryOptionsValidator
{
/// <summary>
/// Validates the OTLP configuration. When <see cref="ZbTelemetryOptions.Exporter"/> is
/// <see cref="ZbExporter.Otlp"/>, <see cref="ZbTelemetryOptions.OtlpEndpoint"/> must be a
/// non-empty, well-formed absolute URI. Throws an <see cref="ArgumentException"/> (naming the
/// option) otherwise. No-op for the Prometheus exporter — a stray endpoint is ignored there.
/// </summary>
/// <param name="options">The populated telemetry options to validate.</param>
/// <param name="paramName">The originating parameter name for the thrown exception.</param>
public static void Validate(ZbTelemetryOptions options, string paramName)
{
ArgumentNullException.ThrowIfNull(options);
if (options.Exporter != ZbExporter.Otlp)
{
return;
}
if (string.IsNullOrWhiteSpace(options.OtlpEndpoint))
{
throw new ArgumentException(
"ZbTelemetryOptions.OtlpEndpoint is required when Exporter = ZbExporter.Otlp " +
"(e.g. \"http://collector:4317\").",
paramName);
}
if (!Uri.TryCreate(options.OtlpEndpoint, UriKind.Absolute, out _))
{
throw new ArgumentException(
$"ZbTelemetryOptions.OtlpEndpoint is not a well-formed absolute URI: " +
$"'{options.OtlpEndpoint}' (expected e.g. \"http://collector:4317\").",
paramName);
}
}
}

Some files were not shown because too many files have changed in this diff Show More