feat(runtime): F7 spawn lifecycle + F20 ShouldStub gate

DriverHostActor.ApplyAndAck now reads the deployment artifact and
reconciles its set of DriverInstanceActor children — spawn the missing,
ApplyDelta to those with changed config, stop the removed/disabled.
The diff lives in pure DriverSpawnPlanner so it can be unit-tested
without an ActorSystem.

Adds IDriverFactory in Core.Abstractions (consumed by Runtime) +
DriverFactoryRegistryAdapter in Core.Hosting that wraps the existing
v1 DriverFactoryRegistry — Runtime stays decoupled from Polly/Serilog,
the Host wires the adapter once driver assemblies have registered.

ShouldStub(type, roles) is now actually called on every spawn — Galaxy
+ Wonderware-Historian boot stubbed on macOS/Linux or whenever the host
carries the dev role. Missing factory ⇒ stub fallback, never a crash.

Tests: 24 → 34 in Runtime (+10):
- DriverSpawnPlannerTests x7 (diff cases, type change ⇒ stop+respawn)
- DeploymentArtifactTests  x5 (empty/malformed/missing fields tolerant)
- DriverHostActorReconcileTests x4 (spawn count, stub fallback,
  ShouldStub gate, second-apply stops the removed)
All 6 v2 test suites green: 120 tests passing.

Closes F20 (ShouldStub wired). F7 marked partial — subscription
publishing + write path still stubbed in DriverInstanceActor itself.
This commit is contained in:
Joseph Doherty
2026-05-26 08:57:16 -04:00
parent 9892ceae9a
commit da141497f8
10 changed files with 768 additions and 12 deletions

View File

@@ -0,0 +1,35 @@
namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
/// <summary>
/// Abstraction over the process-wide driver registry. Runtime consumes this instead of
/// <c>DriverFactoryRegistry</c> directly so the Runtime project doesn't pull in
/// <c>ZB.MOM.WW.OtOpcUa.Core</c> (which would drag in Polly + driver hosting). The fused
/// Host binds a <c>DriverFactoryRegistryAdapter</c> after every <c>Driver.*.Register()</c>
/// extension has run.
/// </summary>
public interface IDriverFactory
{
/// <summary>
/// Return a new <see cref="IDriver"/> for the given <paramref name="driverType"/>, or
/// <c>null</c> when no factory is registered for that type (missing assembly, typo, etc.).
/// The DriverHostActor logs + skips the row rather than failing the whole apply.
/// </summary>
IDriver? TryCreate(string driverType, string driverInstanceId, string driverConfigJson);
/// <summary>Driver-type names this factory can materialise. Mostly for diagnostics + logs.</summary>
IReadOnlyCollection<string> SupportedTypes { get; }
}
/// <summary>
/// Returns <c>null</c> from every <see cref="IDriverFactory.TryCreate"/> call. Bound when the
/// fused Host hasn't registered any concrete driver assemblies yet (Mac dev path, smoke
/// tests). DriverHostActor sees zero supported types and treats the deployment as a no-op.
/// </summary>
public sealed class NullDriverFactory : IDriverFactory
{
public static readonly NullDriverFactory Instance = new();
private NullDriverFactory() { }
public IDriver? TryCreate(string driverType, string driverInstanceId, string driverConfigJson) => null;
public IReadOnlyCollection<string> SupportedTypes { get; } = Array.Empty<string>();
}

View File

@@ -0,0 +1,28 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Core.Hosting;
/// <summary>
/// Adapts the existing <see cref="DriverFactoryRegistry"/> (v1 surface, still the
/// concrete singleton every driver assembly registers itself against) to the v2
/// <see cref="IDriverFactory"/> abstraction consumed by Runtime. The fused Host binds
/// this in DI once each <c>Driver.*.Register(registry)</c> call has completed.
/// </summary>
public sealed class DriverFactoryRegistryAdapter : IDriverFactory
{
private readonly DriverFactoryRegistry _registry;
public DriverFactoryRegistryAdapter(DriverFactoryRegistry registry)
{
ArgumentNullException.ThrowIfNull(registry);
_registry = registry;
}
public IDriver? TryCreate(string driverType, string driverInstanceId, string driverConfigJson)
{
var factory = _registry.TryGet(driverType);
return factory?.Invoke(driverInstanceId, driverConfigJson);
}
public IReadOnlyCollection<string> SupportedTypes => _registry.RegisteredTypes;
}

View File

@@ -0,0 +1,78 @@
using System.Text.Json;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>
/// Minimal driver-side view of the deployment artifact emitted by
/// <c>ConfigComposer.SnapshotAndFlattenAsync</c>. The artifact JSON is the full snapshot —
/// for driver spawning we only need the <c>DriverInstances</c> array. Reading just the
/// subset keeps allocations cheap on every deploy.
/// </summary>
public sealed record DriverInstanceSpec(
Guid DriverInstanceRowId,
string DriverInstanceId,
string Name,
string DriverType,
bool Enabled,
string DriverConfig);
public static class DeploymentArtifact
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
/// <summary>
/// Parse a deployment artifact blob into the list of driver-instance specs to spawn.
/// Empty / malformed blobs return an empty list — callers log + treat as "no drivers".
/// </summary>
public static IReadOnlyList<DriverInstanceSpec> ParseDriverInstances(ReadOnlySpan<byte> blob)
{
if (blob.IsEmpty) return Array.Empty<DriverInstanceSpec>();
try
{
using var doc = JsonDocument.Parse(blob.ToArray());
if (!doc.RootElement.TryGetProperty("DriverInstances", out var arr)
|| arr.ValueKind != JsonValueKind.Array)
{
return Array.Empty<DriverInstanceSpec>();
}
var result = new List<DriverInstanceSpec>(arr.GetArrayLength());
foreach (var el in arr.EnumerateArray())
{
if (el.ValueKind != JsonValueKind.Object) continue;
var spec = TryReadSpec(el);
if (spec is not null) result.Add(spec);
}
return result;
}
catch (JsonException)
{
return Array.Empty<DriverInstanceSpec>();
}
}
private static DriverInstanceSpec? TryReadSpec(JsonElement el)
{
var rowId = el.TryGetProperty("DriverInstanceRowId", out var rowEl)
&& rowEl.TryGetGuid(out var rid) ? rid : Guid.Empty;
var id = el.TryGetProperty("DriverInstanceId", out var idEl) ? idEl.GetString() : null;
var name = el.TryGetProperty("Name", out var nameEl) ? nameEl.GetString() : null;
var type = el.TryGetProperty("DriverType", out var typeEl) ? typeEl.GetString() : null;
var enabled = !el.TryGetProperty("Enabled", out var enEl) || enEl.GetBoolean();
var config = el.TryGetProperty("DriverConfig", out var cfgEl) ? cfgEl.GetString() : null;
if (string.IsNullOrWhiteSpace(id) || string.IsNullOrWhiteSpace(type)) return null;
return new DriverInstanceSpec(
DriverInstanceRowId: rowId,
DriverInstanceId: id!,
Name: name ?? id!,
DriverType: type!,
Enabled: enabled,
DriverConfig: config ?? "{}");
}
}

View File

@@ -9,6 +9,7 @@ using ZB.MOM.WW.OtOpcUa.Commons.Types;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using CommonsNodeId = ZB.MOM.WW.OtOpcUa.Commons.Types.NodeId;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
@@ -38,11 +39,17 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
private readonly CommonsNodeId _localNode;
private readonly IActorRef? _coordinatorOverride;
private readonly IDriverFactory _driverFactory;
private readonly IReadOnlySet<string> _localRoles;
private readonly ILoggingAdapter _log = Context.GetLogger();
private RevisionHash? _currentRevision;
private DeploymentId? _applyingDeploymentId;
private readonly Dictionary<string, ChildEntry> _children = new(StringComparer.Ordinal);
private sealed record ChildEntry(IActorRef Actor, string DriverType, string LastConfigJson, bool Stubbed);
public ITimerScheduler Timers { get; set; } = null!;
public sealed class RetryConfigDbConnection
@@ -54,17 +61,23 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
public static Props Props(
IDbContextFactory<OtOpcUaConfigDbContext> dbFactory,
CommonsNodeId localNode,
IActorRef? coordinator = null) =>
Akka.Actor.Props.Create(() => new DriverHostActor(dbFactory, localNode, coordinator));
IActorRef? coordinator = null,
IDriverFactory? driverFactory = null,
IReadOnlySet<string>? localRoles = null) =>
Akka.Actor.Props.Create(() => new DriverHostActor(dbFactory, localNode, coordinator, driverFactory, localRoles));
public DriverHostActor(
IDbContextFactory<OtOpcUaConfigDbContext> dbFactory,
CommonsNodeId localNode,
IActorRef? coordinator)
IActorRef? coordinator,
IDriverFactory? driverFactory = null,
IReadOnlySet<string>? localRoles = null)
{
_dbFactory = dbFactory;
_localNode = localNode;
_coordinatorOverride = coordinator;
_driverFactory = driverFactory ?? NullDriverFactory.Instance;
_localRoles = localRoles ?? new HashSet<string>(StringComparer.Ordinal);
// Default behavior is Steady — PreStart may flip to Stale or replay an orphan apply.
Become(Steady);
@@ -172,12 +185,19 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
private void HandleGetDiagnostics(GetDiagnostics msg)
{
// Driver-instance children aren't spawned yet (F7); the snapshot reports an empty driver
// list. CurrentRevision is real — it's what the host believes is its applied revision.
var drivers = _children
.Select(kv => new DriverInstanceDiagnostics(
DriverInstanceId: Guid.Empty,
Name: kv.Key,
State: kv.Value.Stubbed ? "Stubbed" : "Spawned",
ConnectedDevices: 0,
FaultedDevices: 0,
LastChangeUtc: DateTime.UtcNow))
.ToArray();
var snapshot = new NodeDiagnosticsSnapshot(
NodeId: _localNode,
CurrentRevision: _currentRevision,
Drivers: Array.Empty<DriverInstanceDiagnostics>(),
Drivers: drivers,
AsOfUtc: DateTime.UtcNow);
Sender.Tell(snapshot);
}
@@ -205,11 +225,12 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
try
{
// Future: dispatch ApplyDelta to children, wait for acks. For Task 37/38, just no-op.
ReconcileDrivers(deploymentId);
_currentRevision = revision;
UpsertNodeDeploymentState(deploymentId, NodeDeploymentStatus.Applied, failureReason: null);
SendAck(deploymentId, ApplyAckOutcome.Applied, failureReason: null, correlation);
_log.Info("DriverHost {Node}: applied deployment {Id} (rev {Rev})", _localNode, deploymentId, revision);
_log.Info("DriverHost {Node}: applied deployment {Id} (rev {Rev}, children={Count})",
_localNode, deploymentId, revision, _children.Count);
}
catch (Exception ex)
{
@@ -224,6 +245,126 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
}
}
/// <summary>
/// Read the deployment artifact + reconcile the set of running <see cref="DriverInstanceActor"/>
/// children. Spawn missing, ApplyDelta on config change, stop removed/disabled drivers.
/// When the artifact blob is empty (legacy ControlPlane tests, smoke fixtures) or the
/// configured <see cref="IDriverFactory"/> can't materialise any of the requested
/// types, this is effectively a no-op.
/// </summary>
private void ReconcileDrivers(DeploymentId deploymentId)
{
byte[] blob;
try
{
using var db = _dbFactory.CreateDbContext();
blob = db.Deployments.AsNoTracking()
.Where(d => d.DeploymentId == deploymentId.Value)
.Select(d => d.ArtifactBlob)
.FirstOrDefault() ?? Array.Empty<byte>();
}
catch (Exception ex)
{
_log.Warning(ex, "DriverHost {Node}: failed to load artifact for {Id}; skipping reconcile",
_localNode, deploymentId);
return;
}
var specs = DeploymentArtifact.ParseDriverInstances(blob);
var snapshots = _children.ToDictionary(
kv => kv.Key,
kv => new DriverChildSnapshot(kv.Value.DriverType, kv.Value.LastConfigJson),
StringComparer.Ordinal);
var plan = DriverSpawnPlanner.Compute(snapshots, specs);
foreach (var id in plan.ToStop) StopChild(id);
foreach (var spec in plan.ToApplyDelta) ApplyChildDelta(spec);
foreach (var spec in plan.ToSpawn) SpawnChild(spec);
}
private void SpawnChild(DriverInstanceSpec spec)
{
var stub = DriverInstanceActor.ShouldStub(spec.DriverType, _localRoles);
IDriver? driver = null;
if (!stub)
{
try { driver = _driverFactory.TryCreate(spec.DriverType, spec.DriverInstanceId, spec.DriverConfig); }
catch (Exception ex)
{
_log.Warning(ex, "DriverHost {Node}: factory for {Type} threw on {Id}; stubbing",
_localNode, spec.DriverType, spec.DriverInstanceId);
}
if (driver is null)
{
_log.Warning(
"DriverHost {Node}: no factory for driver type {Type} (instance {Id}); falling back to stub",
_localNode, spec.DriverType, spec.DriverInstanceId);
stub = true;
}
}
IActorRef child;
if (stub)
{
child = Context.ActorOf(
DriverInstanceActor.Props(new StubbedDriver(spec.DriverInstanceId, spec.DriverType),
reconnectInterval: null, startStubbed: true),
ActorNameFor(spec.DriverInstanceId));
}
else
{
child = Context.ActorOf(
DriverInstanceActor.Props(driver!),
ActorNameFor(spec.DriverInstanceId));
child.Tell(new DriverInstanceActor.InitializeRequested(spec.DriverConfig));
}
_children[spec.DriverInstanceId] = new ChildEntry(child, spec.DriverType, spec.DriverConfig, stub);
_log.Info("DriverHost {Node}: spawned {Type} driver {Id} (stub={Stub})",
_localNode, spec.DriverType, spec.DriverInstanceId, stub);
}
private void ApplyChildDelta(DriverInstanceSpec spec)
{
if (!_children.TryGetValue(spec.DriverInstanceId, out var entry)) return;
entry.Actor.Tell(new DriverInstanceActor.ApplyDelta(spec.DriverConfig, CorrelationId.NewId()));
_children[spec.DriverInstanceId] = entry with { LastConfigJson = spec.DriverConfig };
_log.Debug("DriverHost {Node}: ApplyDelta queued for {Id}", _localNode, spec.DriverInstanceId);
}
private void StopChild(string driverInstanceId)
{
if (!_children.TryGetValue(driverInstanceId, out var entry)) return;
Context.Stop(entry.Actor);
_children.Remove(driverInstanceId);
_log.Info("DriverHost {Node}: stopped driver child {Id}", _localNode, driverInstanceId);
}
private static string ActorNameFor(string driverInstanceId)
{
// Akka actor names cannot contain '/', ':', or whitespace. Mangle defensively.
var chars = driverInstanceId.Select(c => char.IsLetterOrDigit(c) || c is '-' or '_' or '.' ? c : '_').ToArray();
return "drv-" + new string(chars);
}
/// <summary>
/// Minimal placeholder driver used when no factory is registered for a driver type or when
/// <see cref="DriverInstanceActor.ShouldStub"/> returns true. <see cref="DriverInstanceActor"/>
/// is started with <c>startStubbed:true</c> so the driver methods on this object never run.
/// </summary>
private sealed class StubbedDriver : IDriver
{
public string DriverInstanceId { get; }
public string DriverType { get; }
public StubbedDriver(string id, string type) { DriverInstanceId = id; DriverType = type; }
public Task InitializeAsync(string driverConfigJson, CancellationToken cancellationToken) => Task.CompletedTask;
public Task ReinitializeAsync(string driverConfigJson, CancellationToken cancellationToken) => Task.CompletedTask;
public Task ShutdownAsync(CancellationToken cancellationToken) => Task.CompletedTask;
public DriverHealth GetHealth() => new(DriverState.Healthy, DateTime.UtcNow, LastError: null);
public long GetMemoryFootprint() => 0;
public Task FlushOptionalCachesAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}
private void TryRecoverFromStale()
{
try

View File

@@ -0,0 +1,67 @@
namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
/// <summary>
/// Pure diff between the currently-running driver children (keyed by
/// <c>DriverInstance.DriverInstanceId</c>) and the target spec list from a freshly-applied
/// deployment artifact. The DriverHostActor consumes the three lists and calls
/// spawn / ApplyDelta / stop on its child actors accordingly.
/// </summary>
/// <param name="ToSpawn">Specs with no current child — create a new actor.</param>
/// <param name="ToApplyDelta">Specs whose child exists but config JSON or type differs.</param>
/// <param name="ToStop">DriverInstanceIds currently running but missing from the new artifact, or now disabled.</param>
public sealed record DriverSpawnPlan(
IReadOnlyList<DriverInstanceSpec> ToSpawn,
IReadOnlyList<DriverInstanceSpec> ToApplyDelta,
IReadOnlyList<string> ToStop);
public static class DriverSpawnPlanner
{
/// <summary>
/// Compute the spawn/delta/stop sets. Disabled entries in <paramref name="target"/> are
/// treated as "not desired here": if a child exists for the id it goes into ToStop,
/// otherwise the row is dropped entirely (no spawn for a disabled driver).
/// </summary>
public static DriverSpawnPlan Compute(
IReadOnlyDictionary<string, DriverChildSnapshot> current,
IReadOnlyList<DriverInstanceSpec> target)
{
var toSpawn = new List<DriverInstanceSpec>();
var toDelta = new List<DriverInstanceSpec>();
var toStop = new List<string>();
var targetById = new Dictionary<string, DriverInstanceSpec>(StringComparer.Ordinal);
foreach (var spec in target) targetById[spec.DriverInstanceId] = spec;
foreach (var (id, snap) in current)
{
if (!targetById.TryGetValue(id, out var spec) || !spec.Enabled)
{
toStop.Add(id);
continue;
}
// Driver type changes can't be reinitialized in-place (factory-bound) — stop + respawn.
if (!string.Equals(snap.DriverType, spec.DriverType, StringComparison.Ordinal))
{
toStop.Add(id);
toSpawn.Add(spec);
continue;
}
if (!string.Equals(snap.LastConfigJson, spec.DriverConfig, StringComparison.Ordinal))
{
toDelta.Add(spec);
}
}
foreach (var (id, spec) in targetById)
{
if (!spec.Enabled) continue;
if (current.ContainsKey(id)) continue;
toSpawn.Add(spec);
}
return new DriverSpawnPlan(toSpawn, toDelta, toStop);
}
}
/// <summary>Snapshot of one running driver child as the host sees it. Used as the diff input.</summary>
public sealed record DriverChildSnapshot(string DriverType, string LastConfigJson);

View File

@@ -5,6 +5,7 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using ZB.MOM.WW.OtOpcUa.Runtime.Health;
@@ -29,6 +30,7 @@ public static class ServiceCollectionExtensions
public static IServiceCollection AddOtOpcUaRuntime(this IServiceCollection services)
{
services.TryAddSingleton<IAlarmHistorianSink>(NullAlarmHistorianSink.Instance);
services.TryAddSingleton<IDriverFactory>(NullDriverFactory.Instance);
return services;
}
@@ -54,8 +56,9 @@ public static class ServiceCollectionExtensions
{
var dbFactory = resolver.GetService<IDbContextFactory<OtOpcUaConfigDbContext>>();
var roleInfo = resolver.GetService<IClusterRoleInfo>();
// Fallback to NullAlarmHistorianSink if AddOtOpcUaRuntime wasn't called (e.g., test harnesses).
// Fallback to Null* if AddOtOpcUaRuntime wasn't called (e.g., test harnesses).
var historianSink = resolver.GetService<IAlarmHistorianSink>() ?? NullAlarmHistorianSink.Instance;
var driverFactory = resolver.GetService<IDriverFactory>() ?? NullDriverFactory.Instance;
var dbHealth = system.ActorOf(
DbHealthProbeActor.Props(dbFactory),
@@ -63,7 +66,8 @@ public static class ServiceCollectionExtensions
registry.Register<DbHealthProbeActorKey>(dbHealth);
var driverHost = system.ActorOf(
DriverHostActor.Props(dbFactory, roleInfo.LocalNode),
DriverHostActor.Props(dbFactory, roleInfo.LocalNode, coordinator: null,
driverFactory: driverFactory, localRoles: roleInfo.LocalRoles),
DriverHostActorName);
registry.Register<DriverHostActorKey>(driverHost);