From 3d78033ea4f834a588275012f1abe7cb4e63b199 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 20 Apr 2026 22:49:25 -0400 Subject: [PATCH 1/2] =?UTF-8?q?Driver-instance=20bootstrap=20pipeline=20(#?= =?UTF-8?q?248)=20=E2=80=94=20DriverInstance=20rows=20materialise=20as=20l?= =?UTF-8?q?ive=20IDriver=20instances?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the gap surfaced by Phase 7 live smoke (#240): DriverInstance rows in the central config DB had no path to materialise as live IDriver instances in DriverHost, so virtual-tag scripts read BadNodeIdUnknown for every tag. ## DriverFactoryRegistry (Core.Hosting) Process-singleton type-name → factory map. Each driver project's static Register call pre-loads its factory at Program.cs startup; the bootstrapper looks up by DriverInstance.DriverType + invokes with (DriverInstanceId, DriverConfig JSON). Case-insensitive; duplicate-type registration throws. ## GalaxyProxyDriverFactoryExtensions.Register (Driver.Galaxy.Proxy) Static helper — no Microsoft.Extensions.DependencyInjection dep, keeps the driver project free of DI machinery. Parses DriverConfig JSON for PipeName + SharedSecret + ConnectTimeoutMs. DriverInstanceId from the row wins over JSON per the schema's UX_DriverInstance_Generation_LogicalId. ## DriverInstanceBootstrapper (Server) After NodeBootstrap loads the published generation: queries DriverInstance rows scoped to that generation, looks up the factory per row, constructs + DriverHost.RegisterAsync (which calls InitializeAsync). Per plan decision #12 (driver isolation), failure of one driver doesn't prevent others — logs ERR + continues + returns the count actually registered. Unknown DriverType (factory not registered) logs WRN + skips so a missing-assembly deployment doesn't take down the whole server. ## Wired into OpcUaServerService.ExecuteAsync After NodeBootstrap.LoadCurrentGenerationAsync, before PopulateEquipmentContentAsync + Phase7Composer.PrepareAsync. The Phase 7 chain now sees a populated DriverHost so CachedTagUpstreamSource has an upstream feed. ## Live evidence on the dev box Re-ran the Phase 7 smoke from task #240. Pre-#248 vs post-#248: Equipment namespace snapshots loaded for 0/0 driver(s) ← before Equipment namespace snapshots loaded for 1/1 driver(s) ← after Galaxy.Host pipe ACL denied our SID (env-config issue documented in docs/ServiceHosting.md, NOT a code issue) — the bootstrapper logged it as "failed to initialize, driver state will reflect Faulted" and continued past the failure exactly per plan #12. The rest of the pipeline (Equipment walker + Phase 7 composer) ran to completion. ## Tests — 5 new DriverFactoryRegistryTests Register + TryGet round-trip, case-insensitive lookup, duplicate-type throws, null-arg guards, RegisteredTypes snapshot. Pure functions; no DI/DB needed. The bootstrapper's DB-query path is exercised by the live smoke (#240) which operators run before each release. --- .../Hosting/DriverFactoryRegistry.cs | 64 +++++++++++++ .../GalaxyProxyDriverFactoryExtensions.cs | 59 ++++++++++++ ....MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.csproj | 1 + .../DriverInstanceBootstrapper.cs | 88 ++++++++++++++++++ .../OpcUaServerService.cs | 8 ++ src/ZB.MOM.WW.OtOpcUa.Server/Program.cs | 13 +++ .../ZB.MOM.WW.OtOpcUa.Server.csproj | 1 + src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db | Bin 0 -> 40960 bytes .../DriverFactoryRegistryTests.cs | 73 +++++++++++++++ 9 files changed, 307 insertions(+) create mode 100644 src/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverFactoryRegistry.cs create mode 100644 src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/GalaxyProxyDriverFactoryExtensions.cs create mode 100644 src/ZB.MOM.WW.OtOpcUa.Server/DriverInstanceBootstrapper.cs create mode 100644 src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db create mode 100644 tests/ZB.MOM.WW.OtOpcUa.Server.Tests/DriverFactoryRegistryTests.cs diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverFactoryRegistry.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverFactoryRegistry.cs new file mode 100644 index 0000000..3edbfaf --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverFactoryRegistry.cs @@ -0,0 +1,64 @@ +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +namespace ZB.MOM.WW.OtOpcUa.Core.Hosting; + +/// +/// Process-singleton registry of factories keyed by +/// DriverInstance.DriverType string. Each driver project ships a DI +/// extension (e.g. services.AddGalaxyProxyDriverFactory()) that registers +/// its factory at startup; the bootstrapper looks up the factory by +/// DriverInstance.DriverType + invokes it with the row's +/// DriverInstanceId + DriverConfig JSON. +/// +/// +/// Closes the gap surfaced by task #240 live smoke — DriverInstance rows in +/// the central config DB had no path to materialise as registered +/// instances. The factory registry is the seam. +/// +public sealed class DriverFactoryRegistry +{ + private readonly Dictionary> _factories + = new(StringComparer.OrdinalIgnoreCase); + private readonly object _lock = new(); + + /// + /// Register a factory for . Throws if a factory is + /// already registered for that type — drivers are singletons by type-name in + /// this process. + /// + /// Matches DriverInstance.DriverType. + /// + /// Receives (driverInstanceId, driverConfigJson); returns a new + /// . Must NOT call + /// itself — the bootstrapper calls it via + /// so the host's per-driver retry semantics apply uniformly. + /// + public void Register(string driverType, Func factory) + { + ArgumentException.ThrowIfNullOrWhiteSpace(driverType); + ArgumentNullException.ThrowIfNull(factory); + lock (_lock) + { + if (_factories.ContainsKey(driverType)) + throw new InvalidOperationException( + $"DriverType '{driverType}' factory already registered for this process"); + _factories[driverType] = factory; + } + } + + /// + /// Try to look up the factory for . Returns null + /// if no driver assembly registered one — bootstrapper logs + skips so a + /// missing-assembly deployment doesn't take down the whole server. + /// + public Func? TryGet(string driverType) + { + ArgumentException.ThrowIfNullOrWhiteSpace(driverType); + lock (_lock) return _factories.GetValueOrDefault(driverType); + } + + public IReadOnlyCollection RegisteredTypes + { + get { lock (_lock) return [.. _factories.Keys]; } + } +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/GalaxyProxyDriverFactoryExtensions.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/GalaxyProxyDriverFactoryExtensions.cs new file mode 100644 index 0000000..a0a9ae8 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/GalaxyProxyDriverFactoryExtensions.cs @@ -0,0 +1,59 @@ +using System.Text.Json; +using ZB.MOM.WW.OtOpcUa.Core.Hosting; + +namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy; + +/// +/// Static factory registration helper for . Server's +/// Program.cs calls once at startup; the bootstrapper (task #248) +/// then materialises Galaxy DriverInstance rows from the central config DB into live +/// driver instances. No dependency on Microsoft.Extensions.DependencyInjection so the +/// driver project stays free of DI machinery. +/// +public static class GalaxyProxyDriverFactoryExtensions +{ + public const string DriverTypeName = "Galaxy"; + + /// + /// Register the Galaxy driver factory in the supplied . + /// Throws if 'Galaxy' is already registered — single-instance per process. + /// + public static void Register(DriverFactoryRegistry registry) + { + ArgumentNullException.ThrowIfNull(registry); + registry.Register(DriverTypeName, CreateInstance); + } + + internal static GalaxyProxyDriver CreateInstance(string driverInstanceId, string driverConfigJson) + { + ArgumentException.ThrowIfNullOrWhiteSpace(driverInstanceId); + ArgumentException.ThrowIfNullOrWhiteSpace(driverConfigJson); + + // DriverConfig column is a JSON object that mirrors GalaxyProxyOptions. + // Required: PipeName, SharedSecret. Optional: ConnectTimeoutMs (defaults to 10s). + // The DriverInstanceId from the row wins over any value in the JSON — the row + // is the authoritative identity per the schema's UX_DriverInstance_Generation_LogicalId. + using var doc = JsonDocument.Parse(driverConfigJson); + var root = doc.RootElement; + + string pipeName = root.TryGetProperty("PipeName", out var p) && p.ValueKind == JsonValueKind.String + ? p.GetString()! + : throw new InvalidOperationException( + $"GalaxyProxyDriver config for '{driverInstanceId}' missing required PipeName"); + string sharedSecret = root.TryGetProperty("SharedSecret", out var s) && s.ValueKind == JsonValueKind.String + ? s.GetString()! + : throw new InvalidOperationException( + $"GalaxyProxyDriver config for '{driverInstanceId}' missing required SharedSecret"); + var connectTimeout = root.TryGetProperty("ConnectTimeoutMs", out var t) && t.ValueKind == JsonValueKind.Number + ? TimeSpan.FromMilliseconds(t.GetInt32()) + : TimeSpan.FromSeconds(10); + + return new GalaxyProxyDriver(new GalaxyProxyOptions + { + DriverInstanceId = driverInstanceId, + PipeName = pipeName, + SharedSecret = sharedSecret, + ConnectTimeout = connectTimeout, + }); + } +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.csproj b/src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.csproj index 862f2da..be33f47 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.csproj +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy.csproj @@ -13,6 +13,7 @@ + diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/DriverInstanceBootstrapper.cs b/src/ZB.MOM.WW.OtOpcUa.Server/DriverInstanceBootstrapper.cs new file mode 100644 index 0000000..640670f --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Server/DriverInstanceBootstrapper.cs @@ -0,0 +1,88 @@ +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using ZB.MOM.WW.OtOpcUa.Configuration; +using ZB.MOM.WW.OtOpcUa.Core.Hosting; + +namespace ZB.MOM.WW.OtOpcUa.Server; + +/// +/// Task #248 — bridges the gap surfaced by the Phase 7 live smoke (#240) where +/// DriverInstance rows in the central config DB had no path to materialise +/// as live instances in . +/// Called from OpcUaServerService.ExecuteAsync after the bootstrap loads +/// the published generation, before address-space build. +/// +/// +/// +/// Per row: looks up the DriverType string in +/// , calls the factory with the row's +/// DriverInstanceId + DriverConfig JSON to construct an +/// , then registers via +/// which invokes InitializeAsync +/// under the host's lifecycle semantics. +/// +/// +/// Unknown DriverType = factory not registered = log a warning and skip. +/// Per plan decision #12 (driver isolation), failure to construct or initialize +/// one driver doesn't prevent the rest from coming up — the Server keeps serving +/// the others' subtrees + the operator can fix the misconfigured row + republish +/// to retry. +/// +/// +public sealed class DriverInstanceBootstrapper( + DriverFactoryRegistry factories, + DriverHost driverHost, + IServiceScopeFactory scopeFactory, + ILogger logger) +{ + public async Task RegisterDriversFromGenerationAsync(long generationId, CancellationToken ct) + { + using var scope = scopeFactory.CreateScope(); + var db = scope.ServiceProvider.GetRequiredService(); + + var rows = await db.DriverInstances.AsNoTracking() + .Where(d => d.GenerationId == generationId && d.Enabled) + .ToListAsync(ct).ConfigureAwait(false); + + var registered = 0; + var skippedUnknownType = 0; + var failedInit = 0; + + foreach (var row in rows) + { + var factory = factories.TryGet(row.DriverType); + if (factory is null) + { + logger.LogWarning( + "DriverInstance {Id} skipped — DriverType '{Type}' has no registered factory (known: {Known})", + row.DriverInstanceId, row.DriverType, string.Join(",", factories.RegisteredTypes)); + skippedUnknownType++; + continue; + } + + try + { + var driver = factory(row.DriverInstanceId, row.DriverConfig); + await driverHost.RegisterAsync(driver, row.DriverConfig, ct).ConfigureAwait(false); + registered++; + logger.LogInformation( + "DriverInstance {Id} ({Type}) registered + initialized", row.DriverInstanceId, row.DriverType); + } + catch (Exception ex) + { + // Plan decision #12 — driver isolation. Log + continue so one bad row + // doesn't deny the OPC UA endpoint to the rest of the fleet. + logger.LogError(ex, + "DriverInstance {Id} ({Type}) failed to initialize — driver state will reflect Faulted; operator can republish to retry", + row.DriverInstanceId, row.DriverType); + failedInit++; + } + } + + logger.LogInformation( + "DriverInstanceBootstrapper: gen={Gen} registered={Registered} skippedUnknownType={Skipped} failedInit={Failed}", + generationId, registered, skippedUnknownType, failedInit); + return registered; + } +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUaServerService.cs b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUaServerService.cs index 8b7705a..c988ee6 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUaServerService.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUaServerService.cs @@ -18,6 +18,7 @@ public sealed class OpcUaServerService( DriverHost driverHost, OpcUaApplicationHost applicationHost, DriverEquipmentContentRegistry equipmentContentRegistry, + DriverInstanceBootstrapper driverBootstrapper, Phase7Composer phase7Composer, IServiceScopeFactory scopeFactory, ILogger logger) : BackgroundService @@ -37,6 +38,13 @@ public sealed class OpcUaServerService( // address space until the first publish, then the registry fills on next restart. if (result.GenerationId is { } gen) { + // Task #248 — register IDriver instances from the published DriverInstance + // rows BEFORE the equipment-content load + Phase 7 compose, so the rest of + // the pipeline sees a populated DriverHost. Without this step Phase 7's + // CachedTagUpstreamSource has no upstream feed + virtual-tag scripts read + // BadNodeIdUnknown for every tag path (gap surfaced by task #240 smoke). + await driverBootstrapper.RegisterDriversFromGenerationAsync(gen, stoppingToken); + await PopulateEquipmentContentAsync(gen, stoppingToken); // Phase 7 follow-up #246 — load Script + VirtualTag + ScriptedAlarm rows, diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs b/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs index 2cd69a4..956f317 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Server/Program.cs @@ -9,6 +9,7 @@ using ZB.MOM.WW.OtOpcUa.Configuration; using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache; using ZB.MOM.WW.OtOpcUa.Core.Hosting; using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian; +using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy; using ZB.MOM.WW.OtOpcUa.Server; using ZB.MOM.WW.OtOpcUa.Server.OpcUa; using ZB.MOM.WW.OtOpcUa.Server.Phase7; @@ -89,6 +90,18 @@ builder.Services.AddSingleton(_ => new LiteDbConfigCache(opti builder.Services.AddSingleton(); builder.Services.AddSingleton(); +// Task #248 — driver-instance bootstrap pipeline. DriverFactoryRegistry is the +// type-name → factory map; each driver project's static Register call pre-loads +// its factory so the bootstrapper can materialise DriverInstance rows from the +// central DB into live IDriver instances. +builder.Services.AddSingleton(_ => +{ + var registry = new DriverFactoryRegistry(); + GalaxyProxyDriverFactoryExtensions.Register(registry); + return registry; +}); +builder.Services.AddSingleton(); + // ADR-001 Option A wiring — the registry is the handoff between OpcUaServerService's // bootstrap-time population pass + OpcUaApplicationHost's StartAsync walker invocation. // DriverEquipmentContentRegistry.Get is the equipmentContentLookup delegate that PR #155 diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj b/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj index 9d870a1..c8aa856 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj +++ b/src/ZB.MOM.WW.OtOpcUa.Server/ZB.MOM.WW.OtOpcUa.Server.csproj @@ -34,6 +34,7 @@ + diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db b/src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db new file mode 100644 index 0000000000000000000000000000000000000000..ee9bad07f4bb7c0b3bb57efda16126fee962595b GIT binary patch literal 40960 zcmeI*zi!h&90%~R*oY;n|b^ z?RY%&lgVKYypsdJef~N9`r|O0cWXzf%g4Er<<~*9J=U?GE_!aYeo*&z|J`spoE^=_ z52v$s@LN5Coe>~FfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+0D=EsAhY@|gZyQ= zOwcp`Uv{rF{^hJ{`u}=-6AvF{UAml_RcXxG9C6u>IxAXLN;bQAzD)Sr z(6!ZQ_aJ0dN^9n1g#ZBp1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72<($UhVciB zZ2#iX|8`5jm%8D5SDXD}G9a?j*2=nT~(GyJDwZ8FX)-u2SbpfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+0D +/// Task #248 — covers the contract that +/// consumes. +/// +[Trait("Category", "Unit")] +public sealed class DriverFactoryRegistryTests +{ + private static IDriver FakeDriver(string id, string config) => new FakeIDriver(id); + + [Fact] + public void Register_then_TryGet_returns_factory() + { + var r = new DriverFactoryRegistry(); + r.Register("MyDriver", FakeDriver); + + r.TryGet("MyDriver").ShouldNotBeNull(); + r.TryGet("Nope").ShouldBeNull(); + } + + [Fact] + public void Register_is_case_insensitive() + { + var r = new DriverFactoryRegistry(); + r.Register("Galaxy", FakeDriver); + r.TryGet("galaxy").ShouldNotBeNull(); + r.TryGet("GALAXY").ShouldNotBeNull(); + } + + [Fact] + public void Register_duplicate_type_throws() + { + var r = new DriverFactoryRegistry(); + r.Register("Galaxy", FakeDriver); + Should.Throw(() => r.Register("Galaxy", FakeDriver)); + } + + [Fact] + public void Register_null_args_rejected() + { + var r = new DriverFactoryRegistry(); + Should.Throw(() => r.Register("", FakeDriver)); + Should.Throw(() => r.Register("X", null!)); + } + + [Fact] + public void RegisteredTypes_returns_snapshot() + { + var r = new DriverFactoryRegistry(); + r.Register("A", FakeDriver); + r.Register("B", FakeDriver); + r.RegisteredTypes.ShouldContain("A"); + r.RegisteredTypes.ShouldContain("B"); + } + + private sealed class FakeIDriver(string id) : IDriver + { + public string DriverInstanceId => id; + public string DriverType => "Fake"; + public Task InitializeAsync(string _, CancellationToken __) => Task.CompletedTask; + public Task ReinitializeAsync(string _, CancellationToken __) => Task.CompletedTask; + public Task ShutdownAsync(CancellationToken _) => Task.CompletedTask; + public Task FlushOptionalCachesAsync(CancellationToken _) => Task.CompletedTask; + public DriverHealth GetHealth() => new(DriverState.Healthy, null, null); + public long GetMemoryFootprint() => 0; + } +} From 2cb22598d6a5f6d0167a5f81df8486052509565c Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 20 Apr 2026 22:49:48 -0400 Subject: [PATCH 2/2] Drop accidentally-committed LiteDB cache file + add to .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit (#248 wiring) inadvertently picked up src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db — generated by the live smoke re-run that proved the bootstrapper works. Remove from tracking + ignore going forward so future runs don't dirty the working tree. --- .gitignore | 3 +++ src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db | Bin 40960 -> 0 bytes 2 files changed, 3 insertions(+) delete mode 100644 src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db diff --git a/.gitignore b/.gitignore index 580c14c..196445f 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ packages/ .claude/ .local/ + +# LiteDB local config cache (Phase 6.1 Stream D — runtime artifact, not source) +src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db b/src/ZB.MOM.WW.OtOpcUa.Server/config_cache.db deleted file mode 100644 index ee9bad07f4bb7c0b3bb57efda16126fee962595b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40960 zcmeI*zi!h&90%~R*oY;n|b^ z?RY%&lgVKYypsdJef~N9`r|O0cWXzf%g4Er<<~*9J=U?GE_!aYeo*&z|J`spoE^=_ z52v$s@LN5Coe>~FfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+0D=EsAhY@|gZyQ= zOwcp`Uv{rF{^hJ{`u}=-6AvF{UAml_RcXxG9C6u>IxAXLN;bQAzD)Sr z(6!ZQ_aJ0dN^9n1g#ZBp1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72<($UhVciB zZ2#iX|8`5jm%8D5SDXD}G9a?j*2=nT~(GyJDwZ8FX)-u2SbpfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+0D